]> Pileus Git - ~andy/linux/blob - drivers/iommu/dmar.c
x86, irq: Move irq_remapping_enabled declaration to iommu code
[~andy/linux] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <asm/irq_remapping.h>
42 #include <asm/iommu_table.h>
43
44 #include "irq_remapping.h"
45
46 /* No locks are needed as DMA remapping hardware unit
47  * list is constructed at boot time and hotplug of
48  * these units are not supported by the architecture.
49  */
50 LIST_HEAD(dmar_drhd_units);
51
52 struct acpi_table_header * __initdata dmar_tbl;
53 static acpi_size dmar_tbl_size;
54
55 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
56 {
57         /*
58          * add INCLUDE_ALL at the tail, so scan the list will find it at
59          * the very end.
60          */
61         if (drhd->include_all)
62                 list_add_tail(&drhd->list, &dmar_drhd_units);
63         else
64                 list_add(&drhd->list, &dmar_drhd_units);
65 }
66
67 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
68                                            struct pci_dev **dev, u16 segment)
69 {
70         struct pci_bus *bus;
71         struct pci_dev *pdev = NULL;
72         struct acpi_dmar_pci_path *path;
73         int count;
74
75         bus = pci_find_bus(segment, scope->bus);
76         path = (struct acpi_dmar_pci_path *)(scope + 1);
77         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
78                 / sizeof(struct acpi_dmar_pci_path);
79
80         while (count) {
81                 if (pdev)
82                         pci_dev_put(pdev);
83                 /*
84                  * Some BIOSes list non-exist devices in DMAR table, just
85                  * ignore it
86                  */
87                 if (!bus) {
88                         pr_warn("Device scope bus [%d] not found\n", scope->bus);
89                         break;
90                 }
91                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
92                 if (!pdev) {
93                         /* warning will be printed below */
94                         break;
95                 }
96                 path ++;
97                 count --;
98                 bus = pdev->subordinate;
99         }
100         if (!pdev) {
101                 pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                         segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 pr_warn("Device scope type does not match for %s\n",
111                         pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                 struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
133                         pr_warn("Unsupported device scope\n");
134                 }
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
178         if (!dmaru)
179                 return -ENOMEM;
180
181         dmaru->hdr = header;
182         dmaru->reg_base_addr = drhd->address;
183         dmaru->segment = drhd->segment;
184         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
185
186         ret = alloc_iommu(dmaru);
187         if (ret) {
188                 kfree(dmaru);
189                 return ret;
190         }
191         dmar_register_drhd_unit(dmaru);
192         return 0;
193 }
194
195 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
196 {
197         struct acpi_dmar_hardware_unit *drhd;
198         int ret = 0;
199
200         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
201
202         if (dmaru->include_all)
203                 return 0;
204
205         ret = dmar_parse_dev_scope((void *)(drhd + 1),
206                                 ((void *)drhd) + drhd->header.length,
207                                 &dmaru->devices_cnt, &dmaru->devices,
208                                 drhd->segment);
209         if (ret) {
210                 list_del(&dmaru->list);
211                 kfree(dmaru);
212         }
213         return ret;
214 }
215
216 #ifdef CONFIG_ACPI_NUMA
217 static int __init
218 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
219 {
220         struct acpi_dmar_rhsa *rhsa;
221         struct dmar_drhd_unit *drhd;
222
223         rhsa = (struct acpi_dmar_rhsa *)header;
224         for_each_drhd_unit(drhd) {
225                 if (drhd->reg_base_addr == rhsa->base_address) {
226                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
227
228                         if (!node_online(node))
229                                 node = -1;
230                         drhd->iommu->node = node;
231                         return 0;
232                 }
233         }
234         WARN_TAINT(
235                 1, TAINT_FIRMWARE_WORKAROUND,
236                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
237                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
238                 drhd->reg_base_addr,
239                 dmi_get_system_info(DMI_BIOS_VENDOR),
240                 dmi_get_system_info(DMI_BIOS_VERSION),
241                 dmi_get_system_info(DMI_PRODUCT_VERSION));
242
243         return 0;
244 }
245 #endif
246
247 static void __init
248 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
249 {
250         struct acpi_dmar_hardware_unit *drhd;
251         struct acpi_dmar_reserved_memory *rmrr;
252         struct acpi_dmar_atsr *atsr;
253         struct acpi_dmar_rhsa *rhsa;
254
255         switch (header->type) {
256         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
257                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
258                                     header);
259                 pr_info("DRHD base: %#016Lx flags: %#x\n",
260                         (unsigned long long)drhd->address, drhd->flags);
261                 break;
262         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
263                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
264                                     header);
265                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
266                         (unsigned long long)rmrr->base_address,
267                         (unsigned long long)rmrr->end_address);
268                 break;
269         case ACPI_DMAR_TYPE_ATSR:
270                 atsr = container_of(header, struct acpi_dmar_atsr, header);
271                 pr_info("ATSR flags: %#x\n", atsr->flags);
272                 break;
273         case ACPI_DMAR_HARDWARE_AFFINITY:
274                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
275                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
276                        (unsigned long long)rhsa->base_address,
277                        rhsa->proximity_domain);
278                 break;
279         }
280 }
281
282 /**
283  * dmar_table_detect - checks to see if the platform supports DMAR devices
284  */
285 static int __init dmar_table_detect(void)
286 {
287         acpi_status status = AE_OK;
288
289         /* if we could find DMAR table, then there are DMAR devices */
290         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
291                                 (struct acpi_table_header **)&dmar_tbl,
292                                 &dmar_tbl_size);
293
294         if (ACPI_SUCCESS(status) && !dmar_tbl) {
295                 pr_warn("Unable to map DMAR\n");
296                 status = AE_NOT_FOUND;
297         }
298
299         return (ACPI_SUCCESS(status) ? 1 : 0);
300 }
301
302 /**
303  * parse_dmar_table - parses the DMA reporting table
304  */
305 static int __init
306 parse_dmar_table(void)
307 {
308         struct acpi_table_dmar *dmar;
309         struct acpi_dmar_header *entry_header;
310         int ret = 0;
311
312         /*
313          * Do it again, earlier dmar_tbl mapping could be mapped with
314          * fixed map.
315          */
316         dmar_table_detect();
317
318         /*
319          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
320          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
321          */
322         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
323
324         dmar = (struct acpi_table_dmar *)dmar_tbl;
325         if (!dmar)
326                 return -ENODEV;
327
328         if (dmar->width < PAGE_SHIFT - 1) {
329                 pr_warn("Invalid DMAR haw\n");
330                 return -EINVAL;
331         }
332
333         pr_info("Host address width %d\n", dmar->width + 1);
334
335         entry_header = (struct acpi_dmar_header *)(dmar + 1);
336         while (((unsigned long)entry_header) <
337                         (((unsigned long)dmar) + dmar_tbl->length)) {
338                 /* Avoid looping forever on bad ACPI tables */
339                 if (entry_header->length == 0) {
340                         pr_warn("Invalid 0-length structure\n");
341                         ret = -EINVAL;
342                         break;
343                 }
344
345                 dmar_table_print_dmar_entry(entry_header);
346
347                 switch (entry_header->type) {
348                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
349                         ret = dmar_parse_one_drhd(entry_header);
350                         break;
351                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
352                         ret = dmar_parse_one_rmrr(entry_header);
353                         break;
354                 case ACPI_DMAR_TYPE_ATSR:
355                         ret = dmar_parse_one_atsr(entry_header);
356                         break;
357                 case ACPI_DMAR_HARDWARE_AFFINITY:
358 #ifdef CONFIG_ACPI_NUMA
359                         ret = dmar_parse_one_rhsa(entry_header);
360 #endif
361                         break;
362                 default:
363                         pr_warn("Unknown DMAR structure type %d\n",
364                                 entry_header->type);
365                         ret = 0; /* for forward compatibility */
366                         break;
367                 }
368                 if (ret)
369                         break;
370
371                 entry_header = ((void *)entry_header + entry_header->length);
372         }
373         return ret;
374 }
375
376 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
377                           struct pci_dev *dev)
378 {
379         int index;
380
381         while (dev) {
382                 for (index = 0; index < cnt; index++)
383                         if (dev == devices[index])
384                                 return 1;
385
386                 /* Check our parent */
387                 dev = dev->bus->self;
388         }
389
390         return 0;
391 }
392
393 struct dmar_drhd_unit *
394 dmar_find_matched_drhd_unit(struct pci_dev *dev)
395 {
396         struct dmar_drhd_unit *dmaru = NULL;
397         struct acpi_dmar_hardware_unit *drhd;
398
399         dev = pci_physfn(dev);
400
401         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
402                 drhd = container_of(dmaru->hdr,
403                                     struct acpi_dmar_hardware_unit,
404                                     header);
405
406                 if (dmaru->include_all &&
407                     drhd->segment == pci_domain_nr(dev->bus))
408                         return dmaru;
409
410                 if (dmar_pci_device_match(dmaru->devices,
411                                           dmaru->devices_cnt, dev))
412                         return dmaru;
413         }
414
415         return NULL;
416 }
417
418 int __init dmar_dev_scope_init(void)
419 {
420         static int dmar_dev_scope_initialized;
421         struct dmar_drhd_unit *drhd, *drhd_n;
422         int ret = -ENODEV;
423
424         if (dmar_dev_scope_initialized)
425                 return dmar_dev_scope_initialized;
426
427         if (list_empty(&dmar_drhd_units))
428                 goto fail;
429
430         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
431                 ret = dmar_parse_dev(drhd);
432                 if (ret)
433                         goto fail;
434         }
435
436         ret = dmar_parse_rmrr_atsr_dev();
437         if (ret)
438                 goto fail;
439
440         dmar_dev_scope_initialized = 1;
441         return 0;
442
443 fail:
444         dmar_dev_scope_initialized = ret;
445         return ret;
446 }
447
448
449 int __init dmar_table_init(void)
450 {
451         static int dmar_table_initialized;
452         int ret;
453
454         if (dmar_table_initialized)
455                 return 0;
456
457         dmar_table_initialized = 1;
458
459         ret = parse_dmar_table();
460         if (ret) {
461                 if (ret != -ENODEV)
462                         pr_info("parse DMAR table failure.\n");
463                 return ret;
464         }
465
466         if (list_empty(&dmar_drhd_units)) {
467                 pr_info("No DMAR devices found\n");
468                 return -ENODEV;
469         }
470
471         return 0;
472 }
473
474 static void warn_invalid_dmar(u64 addr, const char *message)
475 {
476         WARN_TAINT_ONCE(
477                 1, TAINT_FIRMWARE_WORKAROUND,
478                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
479                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
480                 addr, message,
481                 dmi_get_system_info(DMI_BIOS_VENDOR),
482                 dmi_get_system_info(DMI_BIOS_VERSION),
483                 dmi_get_system_info(DMI_PRODUCT_VERSION));
484 }
485
486 int __init check_zero_address(void)
487 {
488         struct acpi_table_dmar *dmar;
489         struct acpi_dmar_header *entry_header;
490         struct acpi_dmar_hardware_unit *drhd;
491
492         dmar = (struct acpi_table_dmar *)dmar_tbl;
493         entry_header = (struct acpi_dmar_header *)(dmar + 1);
494
495         while (((unsigned long)entry_header) <
496                         (((unsigned long)dmar) + dmar_tbl->length)) {
497                 /* Avoid looping forever on bad ACPI tables */
498                 if (entry_header->length == 0) {
499                         pr_warn("Invalid 0-length structure\n");
500                         return 0;
501                 }
502
503                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
504                         void __iomem *addr;
505                         u64 cap, ecap;
506
507                         drhd = (void *)entry_header;
508                         if (!drhd->address) {
509                                 warn_invalid_dmar(0, "");
510                                 goto failed;
511                         }
512
513                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
514                         if (!addr ) {
515                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
516                                 goto failed;
517                         }
518                         cap = dmar_readq(addr + DMAR_CAP_REG);
519                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
520                         early_iounmap(addr, VTD_PAGE_SIZE);
521                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
522                                 warn_invalid_dmar(drhd->address,
523                                                   " returns all ones");
524                                 goto failed;
525                         }
526                 }
527
528                 entry_header = ((void *)entry_header + entry_header->length);
529         }
530         return 1;
531
532 failed:
533         return 0;
534 }
535
536 int __init detect_intel_iommu(void)
537 {
538         int ret;
539
540         ret = dmar_table_detect();
541         if (ret)
542                 ret = check_zero_address();
543         {
544                 struct acpi_table_dmar *dmar;
545
546                 dmar = (struct acpi_table_dmar *) dmar_tbl;
547
548                 if (ret && irq_remapping_enabled && cpu_has_x2apic &&
549                     dmar->flags & 0x1)
550                         pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
551
552                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
553                         iommu_detected = 1;
554                         /* Make sure ACS will be enabled */
555                         pci_request_acs();
556                 }
557
558 #ifdef CONFIG_X86
559                 if (ret)
560                         x86_init.iommu.iommu_init = intel_iommu_init;
561 #endif
562         }
563         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
564         dmar_tbl = NULL;
565
566         return ret ? 1 : -ENODEV;
567 }
568
569
570 static void unmap_iommu(struct intel_iommu *iommu)
571 {
572         iounmap(iommu->reg);
573         release_mem_region(iommu->reg_phys, iommu->reg_size);
574 }
575
576 /**
577  * map_iommu: map the iommu's registers
578  * @iommu: the iommu to map
579  * @phys_addr: the physical address of the base resgister
580  *
581  * Memory map the iommu's registers.  Start w/ a single page, and
582  * possibly expand if that turns out to be insufficent.
583  */
584 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
585 {
586         int map_size, err=0;
587
588         iommu->reg_phys = phys_addr;
589         iommu->reg_size = VTD_PAGE_SIZE;
590
591         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
592                 pr_err("IOMMU: can't reserve memory\n");
593                 err = -EBUSY;
594                 goto out;
595         }
596
597         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
598         if (!iommu->reg) {
599                 pr_err("IOMMU: can't map the region\n");
600                 err = -ENOMEM;
601                 goto release;
602         }
603
604         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
605         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
606
607         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
608                 err = -EINVAL;
609                 warn_invalid_dmar(phys_addr, " returns all ones");
610                 goto unmap;
611         }
612
613         /* the registers might be more than one page */
614         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
615                          cap_max_fault_reg_offset(iommu->cap));
616         map_size = VTD_PAGE_ALIGN(map_size);
617         if (map_size > iommu->reg_size) {
618                 iounmap(iommu->reg);
619                 release_mem_region(iommu->reg_phys, iommu->reg_size);
620                 iommu->reg_size = map_size;
621                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
622                                         iommu->name)) {
623                         pr_err("IOMMU: can't reserve memory\n");
624                         err = -EBUSY;
625                         goto out;
626                 }
627                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
628                 if (!iommu->reg) {
629                         pr_err("IOMMU: can't map the region\n");
630                         err = -ENOMEM;
631                         goto release;
632                 }
633         }
634         err = 0;
635         goto out;
636
637 unmap:
638         iounmap(iommu->reg);
639 release:
640         release_mem_region(iommu->reg_phys, iommu->reg_size);
641 out:
642         return err;
643 }
644
645 int alloc_iommu(struct dmar_drhd_unit *drhd)
646 {
647         struct intel_iommu *iommu;
648         u32 ver;
649         static int iommu_allocated = 0;
650         int agaw = 0;
651         int msagaw = 0;
652         int err;
653
654         if (!drhd->reg_base_addr) {
655                 warn_invalid_dmar(0, "");
656                 return -EINVAL;
657         }
658
659         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
660         if (!iommu)
661                 return -ENOMEM;
662
663         iommu->seq_id = iommu_allocated++;
664         sprintf (iommu->name, "dmar%d", iommu->seq_id);
665
666         err = map_iommu(iommu, drhd->reg_base_addr);
667         if (err) {
668                 pr_err("IOMMU: failed to map %s\n", iommu->name);
669                 goto error;
670         }
671
672         err = -EINVAL;
673         agaw = iommu_calculate_agaw(iommu);
674         if (agaw < 0) {
675                 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
676                         iommu->seq_id);
677                 goto err_unmap;
678         }
679         msagaw = iommu_calculate_max_sagaw(iommu);
680         if (msagaw < 0) {
681                 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
682                         iommu->seq_id);
683                 goto err_unmap;
684         }
685         iommu->agaw = agaw;
686         iommu->msagaw = msagaw;
687
688         iommu->node = -1;
689
690         ver = readl(iommu->reg + DMAR_VER_REG);
691         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
692                 iommu->seq_id,
693                 (unsigned long long)drhd->reg_base_addr,
694                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
695                 (unsigned long long)iommu->cap,
696                 (unsigned long long)iommu->ecap);
697
698         raw_spin_lock_init(&iommu->register_lock);
699
700         drhd->iommu = iommu;
701         return 0;
702
703  err_unmap:
704         unmap_iommu(iommu);
705  error:
706         kfree(iommu);
707         return err;
708 }
709
710 void free_iommu(struct intel_iommu *iommu)
711 {
712         if (!iommu)
713                 return;
714
715         free_dmar_iommu(iommu);
716
717         if (iommu->reg)
718                 unmap_iommu(iommu);
719
720         kfree(iommu);
721 }
722
723 /*
724  * Reclaim all the submitted descriptors which have completed its work.
725  */
726 static inline void reclaim_free_desc(struct q_inval *qi)
727 {
728         while (qi->desc_status[qi->free_tail] == QI_DONE ||
729                qi->desc_status[qi->free_tail] == QI_ABORT) {
730                 qi->desc_status[qi->free_tail] = QI_FREE;
731                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
732                 qi->free_cnt++;
733         }
734 }
735
736 static int qi_check_fault(struct intel_iommu *iommu, int index)
737 {
738         u32 fault;
739         int head, tail;
740         struct q_inval *qi = iommu->qi;
741         int wait_index = (index + 1) % QI_LENGTH;
742
743         if (qi->desc_status[wait_index] == QI_ABORT)
744                 return -EAGAIN;
745
746         fault = readl(iommu->reg + DMAR_FSTS_REG);
747
748         /*
749          * If IQE happens, the head points to the descriptor associated
750          * with the error. No new descriptors are fetched until the IQE
751          * is cleared.
752          */
753         if (fault & DMA_FSTS_IQE) {
754                 head = readl(iommu->reg + DMAR_IQH_REG);
755                 if ((head >> DMAR_IQ_SHIFT) == index) {
756                         pr_err("VT-d detected invalid descriptor: "
757                                 "low=%llx, high=%llx\n",
758                                 (unsigned long long)qi->desc[index].low,
759                                 (unsigned long long)qi->desc[index].high);
760                         memcpy(&qi->desc[index], &qi->desc[wait_index],
761                                         sizeof(struct qi_desc));
762                         __iommu_flush_cache(iommu, &qi->desc[index],
763                                         sizeof(struct qi_desc));
764                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
765                         return -EINVAL;
766                 }
767         }
768
769         /*
770          * If ITE happens, all pending wait_desc commands are aborted.
771          * No new descriptors are fetched until the ITE is cleared.
772          */
773         if (fault & DMA_FSTS_ITE) {
774                 head = readl(iommu->reg + DMAR_IQH_REG);
775                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
776                 head |= 1;
777                 tail = readl(iommu->reg + DMAR_IQT_REG);
778                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
779
780                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
781
782                 do {
783                         if (qi->desc_status[head] == QI_IN_USE)
784                                 qi->desc_status[head] = QI_ABORT;
785                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
786                 } while (head != tail);
787
788                 if (qi->desc_status[wait_index] == QI_ABORT)
789                         return -EAGAIN;
790         }
791
792         if (fault & DMA_FSTS_ICE)
793                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
794
795         return 0;
796 }
797
798 /*
799  * Submit the queued invalidation descriptor to the remapping
800  * hardware unit and wait for its completion.
801  */
802 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
803 {
804         int rc;
805         struct q_inval *qi = iommu->qi;
806         struct qi_desc *hw, wait_desc;
807         int wait_index, index;
808         unsigned long flags;
809
810         if (!qi)
811                 return 0;
812
813         hw = qi->desc;
814
815 restart:
816         rc = 0;
817
818         raw_spin_lock_irqsave(&qi->q_lock, flags);
819         while (qi->free_cnt < 3) {
820                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
821                 cpu_relax();
822                 raw_spin_lock_irqsave(&qi->q_lock, flags);
823         }
824
825         index = qi->free_head;
826         wait_index = (index + 1) % QI_LENGTH;
827
828         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
829
830         hw[index] = *desc;
831
832         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
833                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
834         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
835
836         hw[wait_index] = wait_desc;
837
838         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
839         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
840
841         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
842         qi->free_cnt -= 2;
843
844         /*
845          * update the HW tail register indicating the presence of
846          * new descriptors.
847          */
848         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
849
850         while (qi->desc_status[wait_index] != QI_DONE) {
851                 /*
852                  * We will leave the interrupts disabled, to prevent interrupt
853                  * context to queue another cmd while a cmd is already submitted
854                  * and waiting for completion on this cpu. This is to avoid
855                  * a deadlock where the interrupt context can wait indefinitely
856                  * for free slots in the queue.
857                  */
858                 rc = qi_check_fault(iommu, index);
859                 if (rc)
860                         break;
861
862                 raw_spin_unlock(&qi->q_lock);
863                 cpu_relax();
864                 raw_spin_lock(&qi->q_lock);
865         }
866
867         qi->desc_status[index] = QI_DONE;
868
869         reclaim_free_desc(qi);
870         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
871
872         if (rc == -EAGAIN)
873                 goto restart;
874
875         return rc;
876 }
877
878 /*
879  * Flush the global interrupt entry cache.
880  */
881 void qi_global_iec(struct intel_iommu *iommu)
882 {
883         struct qi_desc desc;
884
885         desc.low = QI_IEC_TYPE;
886         desc.high = 0;
887
888         /* should never fail */
889         qi_submit_sync(&desc, iommu);
890 }
891
892 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
893                       u64 type)
894 {
895         struct qi_desc desc;
896
897         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
898                         | QI_CC_GRAN(type) | QI_CC_TYPE;
899         desc.high = 0;
900
901         qi_submit_sync(&desc, iommu);
902 }
903
904 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
905                     unsigned int size_order, u64 type)
906 {
907         u8 dw = 0, dr = 0;
908
909         struct qi_desc desc;
910         int ih = 0;
911
912         if (cap_write_drain(iommu->cap))
913                 dw = 1;
914
915         if (cap_read_drain(iommu->cap))
916                 dr = 1;
917
918         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
919                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
920         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
921                 | QI_IOTLB_AM(size_order);
922
923         qi_submit_sync(&desc, iommu);
924 }
925
926 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
927                         u64 addr, unsigned mask)
928 {
929         struct qi_desc desc;
930
931         if (mask) {
932                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
933                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
934                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
935         } else
936                 desc.high = QI_DEV_IOTLB_ADDR(addr);
937
938         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
939                 qdep = 0;
940
941         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
942                    QI_DIOTLB_TYPE;
943
944         qi_submit_sync(&desc, iommu);
945 }
946
947 /*
948  * Disable Queued Invalidation interface.
949  */
950 void dmar_disable_qi(struct intel_iommu *iommu)
951 {
952         unsigned long flags;
953         u32 sts;
954         cycles_t start_time = get_cycles();
955
956         if (!ecap_qis(iommu->ecap))
957                 return;
958
959         raw_spin_lock_irqsave(&iommu->register_lock, flags);
960
961         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
962         if (!(sts & DMA_GSTS_QIES))
963                 goto end;
964
965         /*
966          * Give a chance to HW to complete the pending invalidation requests.
967          */
968         while ((readl(iommu->reg + DMAR_IQT_REG) !=
969                 readl(iommu->reg + DMAR_IQH_REG)) &&
970                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
971                 cpu_relax();
972
973         iommu->gcmd &= ~DMA_GCMD_QIE;
974         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
975
976         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
977                       !(sts & DMA_GSTS_QIES), sts);
978 end:
979         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
980 }
981
982 /*
983  * Enable queued invalidation.
984  */
985 static void __dmar_enable_qi(struct intel_iommu *iommu)
986 {
987         u32 sts;
988         unsigned long flags;
989         struct q_inval *qi = iommu->qi;
990
991         qi->free_head = qi->free_tail = 0;
992         qi->free_cnt = QI_LENGTH;
993
994         raw_spin_lock_irqsave(&iommu->register_lock, flags);
995
996         /* write zero to the tail reg */
997         writel(0, iommu->reg + DMAR_IQT_REG);
998
999         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1000
1001         iommu->gcmd |= DMA_GCMD_QIE;
1002         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1003
1004         /* Make sure hardware complete it */
1005         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1006
1007         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1008 }
1009
1010 /*
1011  * Enable Queued Invalidation interface. This is a must to support
1012  * interrupt-remapping. Also used by DMA-remapping, which replaces
1013  * register based IOTLB invalidation.
1014  */
1015 int dmar_enable_qi(struct intel_iommu *iommu)
1016 {
1017         struct q_inval *qi;
1018         struct page *desc_page;
1019
1020         if (!ecap_qis(iommu->ecap))
1021                 return -ENOENT;
1022
1023         /*
1024          * queued invalidation is already setup and enabled.
1025          */
1026         if (iommu->qi)
1027                 return 0;
1028
1029         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1030         if (!iommu->qi)
1031                 return -ENOMEM;
1032
1033         qi = iommu->qi;
1034
1035
1036         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1037         if (!desc_page) {
1038                 kfree(qi);
1039                 iommu->qi = 0;
1040                 return -ENOMEM;
1041         }
1042
1043         qi->desc = page_address(desc_page);
1044
1045         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1046         if (!qi->desc_status) {
1047                 free_page((unsigned long) qi->desc);
1048                 kfree(qi);
1049                 iommu->qi = 0;
1050                 return -ENOMEM;
1051         }
1052
1053         qi->free_head = qi->free_tail = 0;
1054         qi->free_cnt = QI_LENGTH;
1055
1056         raw_spin_lock_init(&qi->q_lock);
1057
1058         __dmar_enable_qi(iommu);
1059
1060         return 0;
1061 }
1062
1063 /* iommu interrupt handling. Most stuff are MSI-like. */
1064
1065 enum faulttype {
1066         DMA_REMAP,
1067         INTR_REMAP,
1068         UNKNOWN,
1069 };
1070
1071 static const char *dma_remap_fault_reasons[] =
1072 {
1073         "Software",
1074         "Present bit in root entry is clear",
1075         "Present bit in context entry is clear",
1076         "Invalid context entry",
1077         "Access beyond MGAW",
1078         "PTE Write access is not set",
1079         "PTE Read access is not set",
1080         "Next page table ptr is invalid",
1081         "Root table address invalid",
1082         "Context table ptr is invalid",
1083         "non-zero reserved fields in RTP",
1084         "non-zero reserved fields in CTP",
1085         "non-zero reserved fields in PTE",
1086 };
1087
1088 static const char *irq_remap_fault_reasons[] =
1089 {
1090         "Detected reserved fields in the decoded interrupt-remapped request",
1091         "Interrupt index exceeded the interrupt-remapping table size",
1092         "Present field in the IRTE entry is clear",
1093         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1094         "Detected reserved fields in the IRTE entry",
1095         "Blocked a compatibility format interrupt request",
1096         "Blocked an interrupt request due to source-id verification failure",
1097 };
1098
1099 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1100
1101 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1102 {
1103         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1104                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1105                 *fault_type = INTR_REMAP;
1106                 return irq_remap_fault_reasons[fault_reason - 0x20];
1107         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1108                 *fault_type = DMA_REMAP;
1109                 return dma_remap_fault_reasons[fault_reason];
1110         } else {
1111                 *fault_type = UNKNOWN;
1112                 return "Unknown";
1113         }
1114 }
1115
1116 void dmar_msi_unmask(struct irq_data *data)
1117 {
1118         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1119         unsigned long flag;
1120
1121         /* unmask it */
1122         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1123         writel(0, iommu->reg + DMAR_FECTL_REG);
1124         /* Read a reg to force flush the post write */
1125         readl(iommu->reg + DMAR_FECTL_REG);
1126         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1127 }
1128
1129 void dmar_msi_mask(struct irq_data *data)
1130 {
1131         unsigned long flag;
1132         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1133
1134         /* mask it */
1135         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1136         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1137         /* Read a reg to force flush the post write */
1138         readl(iommu->reg + DMAR_FECTL_REG);
1139         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1140 }
1141
1142 void dmar_msi_write(int irq, struct msi_msg *msg)
1143 {
1144         struct intel_iommu *iommu = irq_get_handler_data(irq);
1145         unsigned long flag;
1146
1147         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1148         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1149         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1150         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1151         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1152 }
1153
1154 void dmar_msi_read(int irq, struct msi_msg *msg)
1155 {
1156         struct intel_iommu *iommu = irq_get_handler_data(irq);
1157         unsigned long flag;
1158
1159         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1160         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1161         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1162         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1163         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1164 }
1165
1166 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1167                 u8 fault_reason, u16 source_id, unsigned long long addr)
1168 {
1169         const char *reason;
1170         int fault_type;
1171
1172         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1173
1174         if (fault_type == INTR_REMAP)
1175                 pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1176                        "fault index %llx\n"
1177                         "INTR-REMAP:[fault reason %02d] %s\n",
1178                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1179                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1180                         fault_reason, reason);
1181         else
1182                 pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1183                        "fault addr %llx \n"
1184                        "DMAR:[fault reason %02d] %s\n",
1185                        (type ? "DMA Read" : "DMA Write"),
1186                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1187                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1188         return 0;
1189 }
1190
1191 #define PRIMARY_FAULT_REG_LEN (16)
1192 irqreturn_t dmar_fault(int irq, void *dev_id)
1193 {
1194         struct intel_iommu *iommu = dev_id;
1195         int reg, fault_index;
1196         u32 fault_status;
1197         unsigned long flag;
1198
1199         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1200         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1201         if (fault_status)
1202                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1203
1204         /* TBD: ignore advanced fault log currently */
1205         if (!(fault_status & DMA_FSTS_PPF))
1206                 goto clear_rest;
1207
1208         fault_index = dma_fsts_fault_record_index(fault_status);
1209         reg = cap_fault_reg_offset(iommu->cap);
1210         while (1) {
1211                 u8 fault_reason;
1212                 u16 source_id;
1213                 u64 guest_addr;
1214                 int type;
1215                 u32 data;
1216
1217                 /* highest 32 bits */
1218                 data = readl(iommu->reg + reg +
1219                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1220                 if (!(data & DMA_FRCD_F))
1221                         break;
1222
1223                 fault_reason = dma_frcd_fault_reason(data);
1224                 type = dma_frcd_type(data);
1225
1226                 data = readl(iommu->reg + reg +
1227                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1228                 source_id = dma_frcd_source_id(data);
1229
1230                 guest_addr = dmar_readq(iommu->reg + reg +
1231                                 fault_index * PRIMARY_FAULT_REG_LEN);
1232                 guest_addr = dma_frcd_page_addr(guest_addr);
1233                 /* clear the fault */
1234                 writel(DMA_FRCD_F, iommu->reg + reg +
1235                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1236
1237                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1238
1239                 dmar_fault_do_one(iommu, type, fault_reason,
1240                                 source_id, guest_addr);
1241
1242                 fault_index++;
1243                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1244                         fault_index = 0;
1245                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1246         }
1247 clear_rest:
1248         /* clear all the other faults */
1249         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1250         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1251
1252         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1253         return IRQ_HANDLED;
1254 }
1255
1256 int dmar_set_interrupt(struct intel_iommu *iommu)
1257 {
1258         int irq, ret;
1259
1260         /*
1261          * Check if the fault interrupt is already initialized.
1262          */
1263         if (iommu->irq)
1264                 return 0;
1265
1266         irq = create_irq();
1267         if (!irq) {
1268                 pr_err("IOMMU: no free vectors\n");
1269                 return -EINVAL;
1270         }
1271
1272         irq_set_handler_data(irq, iommu);
1273         iommu->irq = irq;
1274
1275         ret = arch_setup_dmar_msi(irq);
1276         if (ret) {
1277                 irq_set_handler_data(irq, NULL);
1278                 iommu->irq = 0;
1279                 destroy_irq(irq);
1280                 return ret;
1281         }
1282
1283         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1284         if (ret)
1285                 pr_err("IOMMU: can't request irq\n");
1286         return ret;
1287 }
1288
1289 int __init enable_drhd_fault_handling(void)
1290 {
1291         struct dmar_drhd_unit *drhd;
1292
1293         /*
1294          * Enable fault control interrupt.
1295          */
1296         for_each_drhd_unit(drhd) {
1297                 int ret;
1298                 struct intel_iommu *iommu = drhd->iommu;
1299                 ret = dmar_set_interrupt(iommu);
1300
1301                 if (ret) {
1302                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1303                                (unsigned long long)drhd->reg_base_addr, ret);
1304                         return -1;
1305                 }
1306
1307                 /*
1308                  * Clear any previous faults.
1309                  */
1310                 dmar_fault(iommu->irq, iommu);
1311         }
1312
1313         return 0;
1314 }
1315
1316 /*
1317  * Re-enable Queued Invalidation interface.
1318  */
1319 int dmar_reenable_qi(struct intel_iommu *iommu)
1320 {
1321         if (!ecap_qis(iommu->ecap))
1322                 return -ENOENT;
1323
1324         if (!iommu->qi)
1325                 return -ENOENT;
1326
1327         /*
1328          * First disable queued invalidation.
1329          */
1330         dmar_disable_qi(iommu);
1331         /*
1332          * Then enable queued invalidation again. Since there is no pending
1333          * invalidation requests now, it's safe to re-enable queued
1334          * invalidation.
1335          */
1336         __dmar_enable_qi(iommu);
1337
1338         return 0;
1339 }
1340
1341 /*
1342  * Check interrupt remapping support in DMAR table description.
1343  */
1344 int __init dmar_ir_support(void)
1345 {
1346         struct acpi_table_dmar *dmar;
1347         dmar = (struct acpi_table_dmar *)dmar_tbl;
1348         if (!dmar)
1349                 return 0;
1350         return dmar->flags & 0x1;
1351 }
1352 IOMMU_INIT_POST(detect_intel_iommu);