]> Pileus Git - ~andy/linux/blob - drivers/pci/dmar.c
ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h
[~andy/linux] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #define PREFIX "DMAR: "
38
39 /* No locks are needed as DMA remapping hardware unit
40  * list is constructed at boot time and hotplug of
41  * these units are not supported by the architecture.
42  */
43 LIST_HEAD(dmar_drhd_units);
44
45 static struct acpi_table_header * __initdata dmar_tbl;
46 static acpi_size dmar_tbl_size;
47
48 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
49 {
50         /*
51          * add INCLUDE_ALL at the tail, so scan the list will find it at
52          * the very end.
53          */
54         if (drhd->include_all)
55                 list_add_tail(&drhd->list, &dmar_drhd_units);
56         else
57                 list_add(&drhd->list, &dmar_drhd_units);
58 }
59
60 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
61                                            struct pci_dev **dev, u16 segment)
62 {
63         struct pci_bus *bus;
64         struct pci_dev *pdev = NULL;
65         struct acpi_dmar_pci_path *path;
66         int count;
67
68         bus = pci_find_bus(segment, scope->bus);
69         path = (struct acpi_dmar_pci_path *)(scope + 1);
70         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
71                 / sizeof(struct acpi_dmar_pci_path);
72
73         while (count) {
74                 if (pdev)
75                         pci_dev_put(pdev);
76                 /*
77                  * Some BIOSes list non-exist devices in DMAR table, just
78                  * ignore it
79                  */
80                 if (!bus) {
81                         printk(KERN_WARNING
82                         PREFIX "Device scope bus [%d] not found\n",
83                         scope->bus);
84                         break;
85                 }
86                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
87                 if (!pdev) {
88                         printk(KERN_WARNING PREFIX
89                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
90                                 segment, bus->number, path->dev, path->fn);
91                         break;
92                 }
93                 path ++;
94                 count --;
95                 bus = pdev->subordinate;
96         }
97         if (!pdev) {
98                 printk(KERN_WARNING PREFIX
99                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
100                 segment, scope->bus, path->dev, path->fn);
101                 *dev = NULL;
102                 return 0;
103         }
104         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
105                         pdev->subordinate) || (scope->entry_type == \
106                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
107                 pci_dev_put(pdev);
108                 printk(KERN_WARNING PREFIX
109                         "Device scope type does not match for %s\n",
110                          pci_name(pdev));
111                 return -EINVAL;
112         }
113         *dev = pdev;
114         return 0;
115 }
116
117 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
118                                        struct pci_dev ***devices, u16 segment)
119 {
120         struct acpi_dmar_device_scope *scope;
121         void * tmp = start;
122         int index;
123         int ret;
124
125         *cnt = 0;
126         while (start < end) {
127                 scope = start;
128                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
129                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
130                         (*cnt)++;
131                 else
132                         printk(KERN_WARNING PREFIX
133                                 "Unsupported device scope\n");
134                 start += scope->length;
135         }
136         if (*cnt == 0)
137                 return 0;
138
139         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
140         if (!*devices)
141                 return -ENOMEM;
142
143         start = tmp;
144         index = 0;
145         while (start < end) {
146                 scope = start;
147                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
148                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
149                         ret = dmar_parse_one_dev_scope(scope,
150                                 &(*devices)[index], segment);
151                         if (ret) {
152                                 kfree(*devices);
153                                 return ret;
154                         }
155                         index ++;
156                 }
157                 start += scope->length;
158         }
159
160         return 0;
161 }
162
163 /**
164  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
165  * structure which uniquely represent one DMA remapping hardware unit
166  * present in the platform
167  */
168 static int __init
169 dmar_parse_one_drhd(struct acpi_dmar_header *header)
170 {
171         struct acpi_dmar_hardware_unit *drhd;
172         struct dmar_drhd_unit *dmaru;
173         int ret = 0;
174
175         drhd = (struct acpi_dmar_hardware_unit *)header;
176         if (!drhd->address) {
177                 /* Promote an attitude of violence to a BIOS engineer today */
178                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
179                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
180                      dmi_get_system_info(DMI_BIOS_VENDOR),
181                      dmi_get_system_info(DMI_BIOS_VERSION),
182                      dmi_get_system_info(DMI_PRODUCT_VERSION));
183                 return -ENODEV;
184         }
185         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
186         if (!dmaru)
187                 return -ENOMEM;
188
189         dmaru->hdr = header;
190         dmaru->reg_base_addr = drhd->address;
191         dmaru->segment = drhd->segment;
192         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
193
194         ret = alloc_iommu(dmaru);
195         if (ret) {
196                 kfree(dmaru);
197                 return ret;
198         }
199         dmar_register_drhd_unit(dmaru);
200         return 0;
201 }
202
203 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
204 {
205         struct acpi_dmar_hardware_unit *drhd;
206         int ret = 0;
207
208         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
209
210         if (dmaru->include_all)
211                 return 0;
212
213         ret = dmar_parse_dev_scope((void *)(drhd + 1),
214                                 ((void *)drhd) + drhd->header.length,
215                                 &dmaru->devices_cnt, &dmaru->devices,
216                                 drhd->segment);
217         if (ret) {
218                 list_del(&dmaru->list);
219                 kfree(dmaru);
220         }
221         return ret;
222 }
223
224 #ifdef CONFIG_DMAR
225 LIST_HEAD(dmar_rmrr_units);
226
227 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
228 {
229         list_add(&rmrr->list, &dmar_rmrr_units);
230 }
231
232
233 static int __init
234 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
235 {
236         struct acpi_dmar_reserved_memory *rmrr;
237         struct dmar_rmrr_unit *rmrru;
238
239         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
240         if (!rmrru)
241                 return -ENOMEM;
242
243         rmrru->hdr = header;
244         rmrr = (struct acpi_dmar_reserved_memory *)header;
245         rmrru->base_address = rmrr->base_address;
246         rmrru->end_address = rmrr->end_address;
247
248         dmar_register_rmrr_unit(rmrru);
249         return 0;
250 }
251
252 static int __init
253 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
254 {
255         struct acpi_dmar_reserved_memory *rmrr;
256         int ret;
257
258         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
259         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
260                 ((void *)rmrr) + rmrr->header.length,
261                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
262
263         if (ret || (rmrru->devices_cnt == 0)) {
264                 list_del(&rmrru->list);
265                 kfree(rmrru);
266         }
267         return ret;
268 }
269
270 static LIST_HEAD(dmar_atsr_units);
271
272 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
273 {
274         struct acpi_dmar_atsr *atsr;
275         struct dmar_atsr_unit *atsru;
276
277         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
278         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
279         if (!atsru)
280                 return -ENOMEM;
281
282         atsru->hdr = hdr;
283         atsru->include_all = atsr->flags & 0x1;
284
285         list_add(&atsru->list, &dmar_atsr_units);
286
287         return 0;
288 }
289
290 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
291 {
292         int rc;
293         struct acpi_dmar_atsr *atsr;
294
295         if (atsru->include_all)
296                 return 0;
297
298         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
299         rc = dmar_parse_dev_scope((void *)(atsr + 1),
300                                 (void *)atsr + atsr->header.length,
301                                 &atsru->devices_cnt, &atsru->devices,
302                                 atsr->segment);
303         if (rc || !atsru->devices_cnt) {
304                 list_del(&atsru->list);
305                 kfree(atsru);
306         }
307
308         return rc;
309 }
310
311 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
312 {
313         int i;
314         struct pci_bus *bus;
315         struct acpi_dmar_atsr *atsr;
316         struct dmar_atsr_unit *atsru;
317
318         list_for_each_entry(atsru, &dmar_atsr_units, list) {
319                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
320                 if (atsr->segment == pci_domain_nr(dev->bus))
321                         goto found;
322         }
323
324         return 0;
325
326 found:
327         for (bus = dev->bus; bus; bus = bus->parent) {
328                 struct pci_dev *bridge = bus->self;
329
330                 if (!bridge || !bridge->is_pcie ||
331                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
332                         return 0;
333
334                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
335                         for (i = 0; i < atsru->devices_cnt; i++)
336                                 if (atsru->devices[i] == bridge)
337                                         return 1;
338                         break;
339                 }
340         }
341
342         if (atsru->include_all)
343                 return 1;
344
345         return 0;
346 }
347 #endif
348
349 static void __init
350 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
351 {
352         struct acpi_dmar_hardware_unit *drhd;
353         struct acpi_dmar_reserved_memory *rmrr;
354         struct acpi_dmar_atsr *atsr;
355
356         switch (header->type) {
357         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
358                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
359                                     header);
360                 printk (KERN_INFO PREFIX
361                         "DRHD base: %#016Lx flags: %#x\n",
362                         (unsigned long long)drhd->address, drhd->flags);
363                 break;
364         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
365                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
366                                     header);
367                 printk (KERN_INFO PREFIX
368                         "RMRR base: %#016Lx end: %#016Lx\n",
369                         (unsigned long long)rmrr->base_address,
370                         (unsigned long long)rmrr->end_address);
371                 break;
372         case ACPI_DMAR_TYPE_ATSR:
373                 atsr = container_of(header, struct acpi_dmar_atsr, header);
374                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
375                 break;
376         }
377 }
378
379 /**
380  * dmar_table_detect - checks to see if the platform supports DMAR devices
381  */
382 static int __init dmar_table_detect(void)
383 {
384         acpi_status status = AE_OK;
385
386         /* if we could find DMAR table, then there are DMAR devices */
387         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
388                                 (struct acpi_table_header **)&dmar_tbl,
389                                 &dmar_tbl_size);
390
391         if (ACPI_SUCCESS(status) && !dmar_tbl) {
392                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
393                 status = AE_NOT_FOUND;
394         }
395
396         return (ACPI_SUCCESS(status) ? 1 : 0);
397 }
398
399 /**
400  * parse_dmar_table - parses the DMA reporting table
401  */
402 static int __init
403 parse_dmar_table(void)
404 {
405         struct acpi_table_dmar *dmar;
406         struct acpi_dmar_header *entry_header;
407         int ret = 0;
408
409         /*
410          * Do it again, earlier dmar_tbl mapping could be mapped with
411          * fixed map.
412          */
413         dmar_table_detect();
414
415         dmar = (struct acpi_table_dmar *)dmar_tbl;
416         if (!dmar)
417                 return -ENODEV;
418
419         if (dmar->width < PAGE_SHIFT - 1) {
420                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
421                 return -EINVAL;
422         }
423
424         printk (KERN_INFO PREFIX "Host address width %d\n",
425                 dmar->width + 1);
426
427         entry_header = (struct acpi_dmar_header *)(dmar + 1);
428         while (((unsigned long)entry_header) <
429                         (((unsigned long)dmar) + dmar_tbl->length)) {
430                 /* Avoid looping forever on bad ACPI tables */
431                 if (entry_header->length == 0) {
432                         printk(KERN_WARNING PREFIX
433                                 "Invalid 0-length structure\n");
434                         ret = -EINVAL;
435                         break;
436                 }
437
438                 dmar_table_print_dmar_entry(entry_header);
439
440                 switch (entry_header->type) {
441                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
442                         ret = dmar_parse_one_drhd(entry_header);
443                         break;
444                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
445 #ifdef CONFIG_DMAR
446                         ret = dmar_parse_one_rmrr(entry_header);
447 #endif
448                         break;
449                 case ACPI_DMAR_TYPE_ATSR:
450 #ifdef CONFIG_DMAR
451                         ret = dmar_parse_one_atsr(entry_header);
452 #endif
453                         break;
454                 default:
455                         printk(KERN_WARNING PREFIX
456                                 "Unknown DMAR structure type\n");
457                         ret = 0; /* for forward compatibility */
458                         break;
459                 }
460                 if (ret)
461                         break;
462
463                 entry_header = ((void *)entry_header + entry_header->length);
464         }
465         return ret;
466 }
467
468 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
469                           struct pci_dev *dev)
470 {
471         int index;
472
473         while (dev) {
474                 for (index = 0; index < cnt; index++)
475                         if (dev == devices[index])
476                                 return 1;
477
478                 /* Check our parent */
479                 dev = dev->bus->self;
480         }
481
482         return 0;
483 }
484
485 struct dmar_drhd_unit *
486 dmar_find_matched_drhd_unit(struct pci_dev *dev)
487 {
488         struct dmar_drhd_unit *dmaru = NULL;
489         struct acpi_dmar_hardware_unit *drhd;
490
491         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
492                 drhd = container_of(dmaru->hdr,
493                                     struct acpi_dmar_hardware_unit,
494                                     header);
495
496                 if (dmaru->include_all &&
497                     drhd->segment == pci_domain_nr(dev->bus))
498                         return dmaru;
499
500                 if (dmar_pci_device_match(dmaru->devices,
501                                           dmaru->devices_cnt, dev))
502                         return dmaru;
503         }
504
505         return NULL;
506 }
507
508 int __init dmar_dev_scope_init(void)
509 {
510         struct dmar_drhd_unit *drhd, *drhd_n;
511         int ret = -ENODEV;
512
513         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
514                 ret = dmar_parse_dev(drhd);
515                 if (ret)
516                         return ret;
517         }
518
519 #ifdef CONFIG_DMAR
520         {
521                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
522                 struct dmar_atsr_unit *atsr, *atsr_n;
523
524                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
525                         ret = rmrr_parse_dev(rmrr);
526                         if (ret)
527                                 return ret;
528                 }
529
530                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
531                         ret = atsr_parse_dev(atsr);
532                         if (ret)
533                                 return ret;
534                 }
535         }
536 #endif
537
538         return ret;
539 }
540
541
542 int __init dmar_table_init(void)
543 {
544         static int dmar_table_initialized;
545         int ret;
546
547         if (dmar_table_initialized)
548                 return 0;
549
550         dmar_table_initialized = 1;
551
552         ret = parse_dmar_table();
553         if (ret) {
554                 if (ret != -ENODEV)
555                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
556                 return ret;
557         }
558
559         if (list_empty(&dmar_drhd_units)) {
560                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
561                 return -ENODEV;
562         }
563
564 #ifdef CONFIG_DMAR
565         if (list_empty(&dmar_rmrr_units))
566                 printk(KERN_INFO PREFIX "No RMRR found\n");
567
568         if (list_empty(&dmar_atsr_units))
569                 printk(KERN_INFO PREFIX "No ATSR found\n");
570 #endif
571
572 #ifdef CONFIG_INTR_REMAP
573         parse_ioapics_under_ir();
574 #endif
575         return 0;
576 }
577
578 void __init detect_intel_iommu(void)
579 {
580         int ret;
581
582         ret = dmar_table_detect();
583
584         {
585 #ifdef CONFIG_INTR_REMAP
586                 struct acpi_table_dmar *dmar;
587                 /*
588                  * for now we will disable dma-remapping when interrupt
589                  * remapping is enabled.
590                  * When support for queued invalidation for IOTLB invalidation
591                  * is added, we will not need this any more.
592                  */
593                 dmar = (struct acpi_table_dmar *) dmar_tbl;
594                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
595                         printk(KERN_INFO
596                                "Queued invalidation will be enabled to support "
597                                "x2apic and Intr-remapping.\n");
598 #endif
599 #ifdef CONFIG_DMAR
600                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
601                     !dmar_disabled)
602                         iommu_detected = 1;
603 #endif
604         }
605         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
606         dmar_tbl = NULL;
607 }
608
609
610 int alloc_iommu(struct dmar_drhd_unit *drhd)
611 {
612         struct intel_iommu *iommu;
613         int map_size;
614         u32 ver;
615         static int iommu_allocated = 0;
616         int agaw = 0;
617         int msagaw = 0;
618
619         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
620         if (!iommu)
621                 return -ENOMEM;
622
623         iommu->seq_id = iommu_allocated++;
624         sprintf (iommu->name, "dmar%d", iommu->seq_id);
625
626         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
627         if (!iommu->reg) {
628                 printk(KERN_ERR "IOMMU: can't map the region\n");
629                 goto error;
630         }
631         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
632         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
633
634 #ifdef CONFIG_DMAR
635         agaw = iommu_calculate_agaw(iommu);
636         if (agaw < 0) {
637                 printk(KERN_ERR
638                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
639                        iommu->seq_id);
640                 goto error;
641         }
642         msagaw = iommu_calculate_max_sagaw(iommu);
643         if (msagaw < 0) {
644                 printk(KERN_ERR
645                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
646                         iommu->seq_id);
647                 goto error;
648         }
649 #endif
650         iommu->agaw = agaw;
651         iommu->msagaw = msagaw;
652
653         /* the registers might be more than one page */
654         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
655                 cap_max_fault_reg_offset(iommu->cap));
656         map_size = VTD_PAGE_ALIGN(map_size);
657         if (map_size > VTD_PAGE_SIZE) {
658                 iounmap(iommu->reg);
659                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
660                 if (!iommu->reg) {
661                         printk(KERN_ERR "IOMMU: can't map the region\n");
662                         goto error;
663                 }
664         }
665
666         ver = readl(iommu->reg + DMAR_VER_REG);
667         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
668                 (unsigned long long)drhd->reg_base_addr,
669                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
670                 (unsigned long long)iommu->cap,
671                 (unsigned long long)iommu->ecap);
672
673         spin_lock_init(&iommu->register_lock);
674
675         drhd->iommu = iommu;
676         return 0;
677 error:
678         kfree(iommu);
679         return -1;
680 }
681
682 void free_iommu(struct intel_iommu *iommu)
683 {
684         if (!iommu)
685                 return;
686
687 #ifdef CONFIG_DMAR
688         free_dmar_iommu(iommu);
689 #endif
690
691         if (iommu->reg)
692                 iounmap(iommu->reg);
693         kfree(iommu);
694 }
695
696 /*
697  * Reclaim all the submitted descriptors which have completed its work.
698  */
699 static inline void reclaim_free_desc(struct q_inval *qi)
700 {
701         while (qi->desc_status[qi->free_tail] == QI_DONE ||
702                qi->desc_status[qi->free_tail] == QI_ABORT) {
703                 qi->desc_status[qi->free_tail] = QI_FREE;
704                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
705                 qi->free_cnt++;
706         }
707 }
708
709 static int qi_check_fault(struct intel_iommu *iommu, int index)
710 {
711         u32 fault;
712         int head, tail;
713         struct q_inval *qi = iommu->qi;
714         int wait_index = (index + 1) % QI_LENGTH;
715
716         if (qi->desc_status[wait_index] == QI_ABORT)
717                 return -EAGAIN;
718
719         fault = readl(iommu->reg + DMAR_FSTS_REG);
720
721         /*
722          * If IQE happens, the head points to the descriptor associated
723          * with the error. No new descriptors are fetched until the IQE
724          * is cleared.
725          */
726         if (fault & DMA_FSTS_IQE) {
727                 head = readl(iommu->reg + DMAR_IQH_REG);
728                 if ((head >> DMAR_IQ_SHIFT) == index) {
729                         printk(KERN_ERR "VT-d detected invalid descriptor: "
730                                 "low=%llx, high=%llx\n",
731                                 (unsigned long long)qi->desc[index].low,
732                                 (unsigned long long)qi->desc[index].high);
733                         memcpy(&qi->desc[index], &qi->desc[wait_index],
734                                         sizeof(struct qi_desc));
735                         __iommu_flush_cache(iommu, &qi->desc[index],
736                                         sizeof(struct qi_desc));
737                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
738                         return -EINVAL;
739                 }
740         }
741
742         /*
743          * If ITE happens, all pending wait_desc commands are aborted.
744          * No new descriptors are fetched until the ITE is cleared.
745          */
746         if (fault & DMA_FSTS_ITE) {
747                 head = readl(iommu->reg + DMAR_IQH_REG);
748                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
749                 head |= 1;
750                 tail = readl(iommu->reg + DMAR_IQT_REG);
751                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
752
753                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
754
755                 do {
756                         if (qi->desc_status[head] == QI_IN_USE)
757                                 qi->desc_status[head] = QI_ABORT;
758                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
759                 } while (head != tail);
760
761                 if (qi->desc_status[wait_index] == QI_ABORT)
762                         return -EAGAIN;
763         }
764
765         if (fault & DMA_FSTS_ICE)
766                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
767
768         return 0;
769 }
770
771 /*
772  * Submit the queued invalidation descriptor to the remapping
773  * hardware unit and wait for its completion.
774  */
775 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
776 {
777         int rc;
778         struct q_inval *qi = iommu->qi;
779         struct qi_desc *hw, wait_desc;
780         int wait_index, index;
781         unsigned long flags;
782
783         if (!qi)
784                 return 0;
785
786         hw = qi->desc;
787
788 restart:
789         rc = 0;
790
791         spin_lock_irqsave(&qi->q_lock, flags);
792         while (qi->free_cnt < 3) {
793                 spin_unlock_irqrestore(&qi->q_lock, flags);
794                 cpu_relax();
795                 spin_lock_irqsave(&qi->q_lock, flags);
796         }
797
798         index = qi->free_head;
799         wait_index = (index + 1) % QI_LENGTH;
800
801         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
802
803         hw[index] = *desc;
804
805         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
806                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
807         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
808
809         hw[wait_index] = wait_desc;
810
811         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
812         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
813
814         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
815         qi->free_cnt -= 2;
816
817         /*
818          * update the HW tail register indicating the presence of
819          * new descriptors.
820          */
821         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
822
823         while (qi->desc_status[wait_index] != QI_DONE) {
824                 /*
825                  * We will leave the interrupts disabled, to prevent interrupt
826                  * context to queue another cmd while a cmd is already submitted
827                  * and waiting for completion on this cpu. This is to avoid
828                  * a deadlock where the interrupt context can wait indefinitely
829                  * for free slots in the queue.
830                  */
831                 rc = qi_check_fault(iommu, index);
832                 if (rc)
833                         break;
834
835                 spin_unlock(&qi->q_lock);
836                 cpu_relax();
837                 spin_lock(&qi->q_lock);
838         }
839
840         qi->desc_status[index] = QI_DONE;
841
842         reclaim_free_desc(qi);
843         spin_unlock_irqrestore(&qi->q_lock, flags);
844
845         if (rc == -EAGAIN)
846                 goto restart;
847
848         return rc;
849 }
850
851 /*
852  * Flush the global interrupt entry cache.
853  */
854 void qi_global_iec(struct intel_iommu *iommu)
855 {
856         struct qi_desc desc;
857
858         desc.low = QI_IEC_TYPE;
859         desc.high = 0;
860
861         /* should never fail */
862         qi_submit_sync(&desc, iommu);
863 }
864
865 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
866                       u64 type)
867 {
868         struct qi_desc desc;
869
870         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
871                         | QI_CC_GRAN(type) | QI_CC_TYPE;
872         desc.high = 0;
873
874         qi_submit_sync(&desc, iommu);
875 }
876
877 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
878                     unsigned int size_order, u64 type)
879 {
880         u8 dw = 0, dr = 0;
881
882         struct qi_desc desc;
883         int ih = 0;
884
885         if (cap_write_drain(iommu->cap))
886                 dw = 1;
887
888         if (cap_read_drain(iommu->cap))
889                 dr = 1;
890
891         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
892                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
893         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
894                 | QI_IOTLB_AM(size_order);
895
896         qi_submit_sync(&desc, iommu);
897 }
898
899 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
900                         u64 addr, unsigned mask)
901 {
902         struct qi_desc desc;
903
904         if (mask) {
905                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
906                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
907                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
908         } else
909                 desc.high = QI_DEV_IOTLB_ADDR(addr);
910
911         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
912                 qdep = 0;
913
914         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
915                    QI_DIOTLB_TYPE;
916
917         qi_submit_sync(&desc, iommu);
918 }
919
920 /*
921  * Disable Queued Invalidation interface.
922  */
923 void dmar_disable_qi(struct intel_iommu *iommu)
924 {
925         unsigned long flags;
926         u32 sts;
927         cycles_t start_time = get_cycles();
928
929         if (!ecap_qis(iommu->ecap))
930                 return;
931
932         spin_lock_irqsave(&iommu->register_lock, flags);
933
934         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
935         if (!(sts & DMA_GSTS_QIES))
936                 goto end;
937
938         /*
939          * Give a chance to HW to complete the pending invalidation requests.
940          */
941         while ((readl(iommu->reg + DMAR_IQT_REG) !=
942                 readl(iommu->reg + DMAR_IQH_REG)) &&
943                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
944                 cpu_relax();
945
946         iommu->gcmd &= ~DMA_GCMD_QIE;
947         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
948
949         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
950                       !(sts & DMA_GSTS_QIES), sts);
951 end:
952         spin_unlock_irqrestore(&iommu->register_lock, flags);
953 }
954
955 /*
956  * Enable queued invalidation.
957  */
958 static void __dmar_enable_qi(struct intel_iommu *iommu)
959 {
960         u32 sts;
961         unsigned long flags;
962         struct q_inval *qi = iommu->qi;
963
964         qi->free_head = qi->free_tail = 0;
965         qi->free_cnt = QI_LENGTH;
966
967         spin_lock_irqsave(&iommu->register_lock, flags);
968
969         /* write zero to the tail reg */
970         writel(0, iommu->reg + DMAR_IQT_REG);
971
972         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
973
974         iommu->gcmd |= DMA_GCMD_QIE;
975         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
976
977         /* Make sure hardware complete it */
978         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
979
980         spin_unlock_irqrestore(&iommu->register_lock, flags);
981 }
982
983 /*
984  * Enable Queued Invalidation interface. This is a must to support
985  * interrupt-remapping. Also used by DMA-remapping, which replaces
986  * register based IOTLB invalidation.
987  */
988 int dmar_enable_qi(struct intel_iommu *iommu)
989 {
990         struct q_inval *qi;
991
992         if (!ecap_qis(iommu->ecap))
993                 return -ENOENT;
994
995         /*
996          * queued invalidation is already setup and enabled.
997          */
998         if (iommu->qi)
999                 return 0;
1000
1001         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1002         if (!iommu->qi)
1003                 return -ENOMEM;
1004
1005         qi = iommu->qi;
1006
1007         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1008         if (!qi->desc) {
1009                 kfree(qi);
1010                 iommu->qi = 0;
1011                 return -ENOMEM;
1012         }
1013
1014         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1015         if (!qi->desc_status) {
1016                 free_page((unsigned long) qi->desc);
1017                 kfree(qi);
1018                 iommu->qi = 0;
1019                 return -ENOMEM;
1020         }
1021
1022         qi->free_head = qi->free_tail = 0;
1023         qi->free_cnt = QI_LENGTH;
1024
1025         spin_lock_init(&qi->q_lock);
1026
1027         __dmar_enable_qi(iommu);
1028
1029         return 0;
1030 }
1031
1032 /* iommu interrupt handling. Most stuff are MSI-like. */
1033
1034 enum faulttype {
1035         DMA_REMAP,
1036         INTR_REMAP,
1037         UNKNOWN,
1038 };
1039
1040 static const char *dma_remap_fault_reasons[] =
1041 {
1042         "Software",
1043         "Present bit in root entry is clear",
1044         "Present bit in context entry is clear",
1045         "Invalid context entry",
1046         "Access beyond MGAW",
1047         "PTE Write access is not set",
1048         "PTE Read access is not set",
1049         "Next page table ptr is invalid",
1050         "Root table address invalid",
1051         "Context table ptr is invalid",
1052         "non-zero reserved fields in RTP",
1053         "non-zero reserved fields in CTP",
1054         "non-zero reserved fields in PTE",
1055 };
1056
1057 static const char *intr_remap_fault_reasons[] =
1058 {
1059         "Detected reserved fields in the decoded interrupt-remapped request",
1060         "Interrupt index exceeded the interrupt-remapping table size",
1061         "Present field in the IRTE entry is clear",
1062         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1063         "Detected reserved fields in the IRTE entry",
1064         "Blocked a compatibility format interrupt request",
1065         "Blocked an interrupt request due to source-id verification failure",
1066 };
1067
1068 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1069
1070 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1071 {
1072         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1073                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1074                 *fault_type = INTR_REMAP;
1075                 return intr_remap_fault_reasons[fault_reason - 0x20];
1076         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1077                 *fault_type = DMA_REMAP;
1078                 return dma_remap_fault_reasons[fault_reason];
1079         } else {
1080                 *fault_type = UNKNOWN;
1081                 return "Unknown";
1082         }
1083 }
1084
1085 void dmar_msi_unmask(unsigned int irq)
1086 {
1087         struct intel_iommu *iommu = get_irq_data(irq);
1088         unsigned long flag;
1089
1090         /* unmask it */
1091         spin_lock_irqsave(&iommu->register_lock, flag);
1092         writel(0, iommu->reg + DMAR_FECTL_REG);
1093         /* Read a reg to force flush the post write */
1094         readl(iommu->reg + DMAR_FECTL_REG);
1095         spin_unlock_irqrestore(&iommu->register_lock, flag);
1096 }
1097
1098 void dmar_msi_mask(unsigned int irq)
1099 {
1100         unsigned long flag;
1101         struct intel_iommu *iommu = get_irq_data(irq);
1102
1103         /* mask it */
1104         spin_lock_irqsave(&iommu->register_lock, flag);
1105         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1106         /* Read a reg to force flush the post write */
1107         readl(iommu->reg + DMAR_FECTL_REG);
1108         spin_unlock_irqrestore(&iommu->register_lock, flag);
1109 }
1110
1111 void dmar_msi_write(int irq, struct msi_msg *msg)
1112 {
1113         struct intel_iommu *iommu = get_irq_data(irq);
1114         unsigned long flag;
1115
1116         spin_lock_irqsave(&iommu->register_lock, flag);
1117         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1118         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1119         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1120         spin_unlock_irqrestore(&iommu->register_lock, flag);
1121 }
1122
1123 void dmar_msi_read(int irq, struct msi_msg *msg)
1124 {
1125         struct intel_iommu *iommu = get_irq_data(irq);
1126         unsigned long flag;
1127
1128         spin_lock_irqsave(&iommu->register_lock, flag);
1129         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1130         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1131         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1132         spin_unlock_irqrestore(&iommu->register_lock, flag);
1133 }
1134
1135 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1136                 u8 fault_reason, u16 source_id, unsigned long long addr)
1137 {
1138         const char *reason;
1139         int fault_type;
1140
1141         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1142
1143         if (fault_type == INTR_REMAP)
1144                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1145                        "fault index %llx\n"
1146                         "INTR-REMAP:[fault reason %02d] %s\n",
1147                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1148                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1149                         fault_reason, reason);
1150         else
1151                 printk(KERN_ERR
1152                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1153                        "fault addr %llx \n"
1154                        "DMAR:[fault reason %02d] %s\n",
1155                        (type ? "DMA Read" : "DMA Write"),
1156                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1157                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1158         return 0;
1159 }
1160
1161 #define PRIMARY_FAULT_REG_LEN (16)
1162 irqreturn_t dmar_fault(int irq, void *dev_id)
1163 {
1164         struct intel_iommu *iommu = dev_id;
1165         int reg, fault_index;
1166         u32 fault_status;
1167         unsigned long flag;
1168
1169         spin_lock_irqsave(&iommu->register_lock, flag);
1170         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1171         if (fault_status)
1172                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1173                        fault_status);
1174
1175         /* TBD: ignore advanced fault log currently */
1176         if (!(fault_status & DMA_FSTS_PPF))
1177                 goto clear_rest;
1178
1179         fault_index = dma_fsts_fault_record_index(fault_status);
1180         reg = cap_fault_reg_offset(iommu->cap);
1181         while (1) {
1182                 u8 fault_reason;
1183                 u16 source_id;
1184                 u64 guest_addr;
1185                 int type;
1186                 u32 data;
1187
1188                 /* highest 32 bits */
1189                 data = readl(iommu->reg + reg +
1190                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1191                 if (!(data & DMA_FRCD_F))
1192                         break;
1193
1194                 fault_reason = dma_frcd_fault_reason(data);
1195                 type = dma_frcd_type(data);
1196
1197                 data = readl(iommu->reg + reg +
1198                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1199                 source_id = dma_frcd_source_id(data);
1200
1201                 guest_addr = dmar_readq(iommu->reg + reg +
1202                                 fault_index * PRIMARY_FAULT_REG_LEN);
1203                 guest_addr = dma_frcd_page_addr(guest_addr);
1204                 /* clear the fault */
1205                 writel(DMA_FRCD_F, iommu->reg + reg +
1206                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1207
1208                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1209
1210                 dmar_fault_do_one(iommu, type, fault_reason,
1211                                 source_id, guest_addr);
1212
1213                 fault_index++;
1214                 if (fault_index > cap_num_fault_regs(iommu->cap))
1215                         fault_index = 0;
1216                 spin_lock_irqsave(&iommu->register_lock, flag);
1217         }
1218 clear_rest:
1219         /* clear all the other faults */
1220         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1221         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1222
1223         spin_unlock_irqrestore(&iommu->register_lock, flag);
1224         return IRQ_HANDLED;
1225 }
1226
1227 int dmar_set_interrupt(struct intel_iommu *iommu)
1228 {
1229         int irq, ret;
1230
1231         /*
1232          * Check if the fault interrupt is already initialized.
1233          */
1234         if (iommu->irq)
1235                 return 0;
1236
1237         irq = create_irq();
1238         if (!irq) {
1239                 printk(KERN_ERR "IOMMU: no free vectors\n");
1240                 return -EINVAL;
1241         }
1242
1243         set_irq_data(irq, iommu);
1244         iommu->irq = irq;
1245
1246         ret = arch_setup_dmar_msi(irq);
1247         if (ret) {
1248                 set_irq_data(irq, NULL);
1249                 iommu->irq = 0;
1250                 destroy_irq(irq);
1251                 return ret;
1252         }
1253
1254         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1255         if (ret)
1256                 printk(KERN_ERR "IOMMU: can't request irq\n");
1257         return ret;
1258 }
1259
1260 int __init enable_drhd_fault_handling(void)
1261 {
1262         struct dmar_drhd_unit *drhd;
1263
1264         /*
1265          * Enable fault control interrupt.
1266          */
1267         for_each_drhd_unit(drhd) {
1268                 int ret;
1269                 struct intel_iommu *iommu = drhd->iommu;
1270                 ret = dmar_set_interrupt(iommu);
1271
1272                 if (ret) {
1273                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1274                                " interrupt, ret %d\n",
1275                                (unsigned long long)drhd->reg_base_addr, ret);
1276                         return -1;
1277                 }
1278         }
1279
1280         return 0;
1281 }
1282
1283 /*
1284  * Re-enable Queued Invalidation interface.
1285  */
1286 int dmar_reenable_qi(struct intel_iommu *iommu)
1287 {
1288         if (!ecap_qis(iommu->ecap))
1289                 return -ENOENT;
1290
1291         if (!iommu->qi)
1292                 return -ENOENT;
1293
1294         /*
1295          * First disable queued invalidation.
1296          */
1297         dmar_disable_qi(iommu);
1298         /*
1299          * Then enable queued invalidation again. Since there is no pending
1300          * invalidation requests now, it's safe to re-enable queued
1301          * invalidation.
1302          */
1303         __dmar_enable_qi(iommu);
1304
1305         return 0;
1306 }