]> Pileus Git - ~andy/linux/blobdiff - drivers/edac/amd64_edac.c
Merge branch 'for-2.6.31' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
[~andy/linux] / drivers / edac / amd64_edac.c
index 5a6e714b115edaf89e602849d85bae027d60be3f..c36bf40568cf8e7de0aacaa5ce7219650cec5a26 100644 (file)
@@ -1,4 +1,5 @@
 #include "amd64_edac.h"
+#include <asm/k8.h>
 
 static struct edac_pci_ctl_info *amd64_ctl_pci;
 
@@ -2771,3 +2772,583 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
        return empty;
 }
 
+/*
+ * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
+ * enable it.
+ */
+static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+{
+       struct amd64_pvt *pvt = mci->pvt_info;
+       const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+       int cpu, idx = 0, err = 0;
+       struct msr msrs[cpumask_weight(cpumask)];
+       u32 value;
+       u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+       if (!ecc_enable_override)
+               return;
+
+       memset(msrs, 0, sizeof(msrs));
+
+       amd64_printk(KERN_WARNING,
+               "'ecc_enable_override' parameter is active, "
+               "Enabling AMD ECC hardware now: CAUTION\n");
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+       if (err)
+               debugf0("Reading K8_NBCTL failed\n");
+
+       /* turn on UECCn and CECCEn bits */
+       pvt->old_nbctl = value & mask;
+       pvt->nbctl_mcgctl_saved = 1;
+
+       value |= mask;
+       pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+       rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+       for_each_cpu(cpu, cpumask) {
+               if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
+                       set_bit(idx, &pvt->old_mcgctl);
+
+               msrs[idx].l |= K8_MSR_MCGCTL_NBE;
+               idx++;
+       }
+       wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+       if (err)
+               debugf0("Reading K8_NBCFG failed\n");
+
+       debugf0("NBCFG(1)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+               (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+               (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+       if (!(value & K8_NBCFG_ECC_ENABLE)) {
+               amd64_printk(KERN_WARNING,
+                       "This node reports that DRAM ECC is "
+                       "currently Disabled; ENABLING now\n");
+
+               /* Attempt to turn on DRAM ECC Enable */
+               value |= K8_NBCFG_ECC_ENABLE;
+               pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+
+               err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+               if (err)
+                       debugf0("Reading K8_NBCFG failed\n");
+
+               if (!(value & K8_NBCFG_ECC_ENABLE)) {
+                       amd64_printk(KERN_WARNING,
+                               "Hardware rejects Enabling DRAM ECC checking\n"
+                               "Check memory DIMM configuration\n");
+               } else {
+                       amd64_printk(KERN_DEBUG,
+                               "Hardware accepted DRAM ECC Enable\n");
+               }
+       }
+       debugf0("NBCFG(2)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+               (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+               (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+       pvt->ctl_error_info.nbcfg = value;
+}
+
+static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+{
+       const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+       int cpu, idx = 0, err = 0;
+       struct msr msrs[cpumask_weight(cpumask)];
+       u32 value;
+       u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+       if (!pvt->nbctl_mcgctl_saved)
+               return;
+
+       memset(msrs, 0, sizeof(msrs));
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+       if (err)
+               debugf0("Reading K8_NBCTL failed\n");
+       value &= ~mask;
+       value |= pvt->old_nbctl;
+
+       /* restore the NB Enable MCGCTL bit */
+       pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+       rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+       for_each_cpu(cpu, cpumask) {
+               msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
+               msrs[idx].l |=
+                       test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
+               idx++;
+       }
+
+       wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+}
+
+static void check_mcg_ctl(void *ret)
+{
+       u64 msr_val = 0;
+       u8 nbe;
+
+       rdmsrl(MSR_IA32_MCG_CTL, msr_val);
+       nbe = msr_val & K8_MSR_MCGCTL_NBE;
+
+       debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+               raw_smp_processor_id(), msr_val,
+               (nbe ? "enabled" : "disabled"));
+
+       if (!nbe)
+               *(int *)ret = 0;
+}
+
+/* check MCG_CTL on all the cpus on this node */
+static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
+{
+       int ret = 1;
+       preempt_disable();
+       smp_call_function_many(mask, check_mcg_ctl, &ret, 1);
+       preempt_enable();
+
+       return ret;
+}
+
+/*
+ * EDAC requires that the BIOS have ECC enabled before taking over the
+ * processing of ECC errors. This is because the BIOS can properly initialize
+ * the memory system completely. A command line option allows to force-enable
+ * hardware ECC later in amd64_enable_ecc_error_reporting().
+ */
+static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+{
+       u32 value;
+       int err = 0, ret = 0;
+       u8 ecc_enabled = 0;
+
+       err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+       if (err)
+               debugf0("Reading K8_NBCTL failed\n");
+
+       ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
+
+       ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id));
+
+       debugf0("K8_NBCFG=0x%x,  DRAM ECC is %s\n", value,
+                       (value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled"));
+
+       if (!ecc_enabled || !ret) {
+               if (!ecc_enabled) {
+                       amd64_printk(KERN_WARNING, "This node reports that "
+                                                  "Memory ECC is currently "
+                                                  "disabled.\n");
+
+                       amd64_printk(KERN_WARNING, "bit 0x%lx in register "
+                               "F3x%x of the MISC_CONTROL device (%s) "
+                               "should be enabled\n", K8_NBCFG_ECC_ENABLE,
+                               K8_NBCFG, pci_name(pvt->misc_f3_ctl));
+               }
+               if (!ret) {
+                       amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
+                                       "of node %d should be enabled\n",
+                                       K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
+                                       pvt->mc_node_id);
+               }
+               if (!ecc_enable_override) {
+                       amd64_printk(KERN_WARNING, "WARNING: ECC is NOT "
+                               "currently enabled by the BIOS. Module "
+                               "will NOT be loaded.\n"
+                               "    Either Enable ECC in the BIOS, "
+                               "or use the 'ecc_enable_override' "
+                               "parameter.\n"
+                               "    Might be a BIOS bug, if BIOS says "
+                               "ECC is enabled\n"
+                               "    Use of the override can cause "
+                               "unknown side effects.\n");
+                       ret = -ENODEV;
+               }
+       } else {
+               amd64_printk(KERN_INFO,
+                       "ECC is enabled by BIOS, Proceeding "
+                       "with EDAC module initialization\n");
+
+               /* CLEAR the override, since BIOS controlled it */
+               ecc_enable_override = 0;
+       }
+
+       return ret;
+}
+
+struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
+                                         ARRAY_SIZE(amd64_inj_attrs) +
+                                         1];
+
+struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
+
+static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+{
+       unsigned int i = 0, j = 0;
+
+       for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
+               sysfs_attrs[i] = amd64_dbg_attrs[i];
+
+       for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
+               sysfs_attrs[i] = amd64_inj_attrs[j];
+
+       sysfs_attrs[i] = terminator;
+
+       mci->mc_driver_sysfs_attributes = sysfs_attrs;
+}
+
+static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
+{
+       struct amd64_pvt *pvt = mci->pvt_info;
+
+       mci->mtype_cap          = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
+       mci->edac_ctl_cap       = EDAC_FLAG_NONE;
+       mci->edac_cap           = EDAC_FLAG_NONE;
+
+       if (pvt->nbcap & K8_NBCAP_SECDED)
+               mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
+
+       if (pvt->nbcap & K8_NBCAP_CHIPKILL)
+               mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
+
+       mci->edac_cap           = amd64_determine_edac_cap(pvt);
+       mci->mod_name           = EDAC_MOD_STR;
+       mci->mod_ver            = EDAC_AMD64_VERSION;
+       mci->ctl_name           = get_amd_family_name(pvt->mc_type_index);
+       mci->dev_name           = pci_name(pvt->dram_f2_ctl);
+       mci->ctl_page_to_phys   = NULL;
+
+       /* IMPORTANT: Set the polling 'check' function in this module */
+       mci->edac_check         = amd64_check;
+
+       /* memory scrubber interface */
+       mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
+       mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
+}
+
+/*
+ * Init stuff for this DRAM Controller device.
+ *
+ * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
+ * Space feature MUST be enabled on ALL Processors prior to actually reading
+ * from the ECS registers. Since the loading of the module can occur on any
+ * 'core', and cores don't 'see' all the other processors ECS data when the
+ * others are NOT enabled. Our solution is to first enable ECS access in this
+ * routine on all processors, gather some data in a amd64_pvt structure and
+ * later come back in a finish-setup function to perform that final
+ * initialization. See also amd64_init_2nd_stage() for that.
+ */
+static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
+                                   int mc_type_index)
+{
+       struct amd64_pvt *pvt = NULL;
+       int err = 0, ret;
+
+       ret = -ENOMEM;
+       pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+       if (!pvt)
+               goto err_exit;
+
+       pvt->mc_node_id = get_mc_node_id_from_pdev(dram_f2_ctl);
+
+       pvt->dram_f2_ctl        = dram_f2_ctl;
+       pvt->ext_model          = boot_cpu_data.x86_model >> 4;
+       pvt->mc_type_index      = mc_type_index;
+       pvt->ops                = family_ops(mc_type_index);
+       pvt->old_mcgctl         = 0;
+
+       /*
+        * We have the dram_f2_ctl device as an argument, now go reserve its
+        * sibling devices from the PCI system.
+        */
+       ret = -ENODEV;
+       err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
+       if (err)
+               goto err_free;
+
+       ret = -EINVAL;
+       err = amd64_check_ecc_enabled(pvt);
+       if (err)
+               goto err_put;
+
+       /*
+        * Key operation here: setup of HW prior to performing ops on it. Some
+        * setup is required to access ECS data. After this is performed, the
+        * 'teardown' function must be called upon error and normal exit paths.
+        */
+       if (boot_cpu_data.x86 >= 0x10)
+               amd64_setup(pvt);
+
+       /*
+        * Save the pointer to the private data for use in 2nd initialization
+        * stage
+        */
+       pvt_lookup[pvt->mc_node_id] = pvt;
+
+       return 0;
+
+err_put:
+       amd64_free_mc_sibling_devices(pvt);
+
+err_free:
+       kfree(pvt);
+
+err_exit:
+       return ret;
+}
+
+/*
+ * This is the finishing stage of the init code. Needs to be performed after all
+ * MCs' hardware have been prepped for accessing extended config space.
+ */
+static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
+{
+       int node_id = pvt->mc_node_id;
+       struct mem_ctl_info *mci;
+       int ret, err = 0;
+
+       amd64_read_mc_registers(pvt);
+
+       ret = -ENODEV;
+       if (pvt->ops->probe_valid_hardware) {
+               err = pvt->ops->probe_valid_hardware(pvt);
+               if (err)
+                       goto err_exit;
+       }
+
+       /*
+        * We need to determine how many memory channels there are. Then use
+        * that information for calculating the size of the dynamic instance
+        * tables in the 'mci' structure
+        */
+       pvt->channel_count = pvt->ops->early_channel_count(pvt);
+       if (pvt->channel_count < 0)
+               goto err_exit;
+
+       ret = -ENOMEM;
+       mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id);
+       if (!mci)
+               goto err_exit;
+
+       mci->pvt_info = pvt;
+
+       mci->dev = &pvt->dram_f2_ctl->dev;
+       amd64_setup_mci_misc_attributes(mci);
+
+       if (amd64_init_csrows(mci))
+               mci->edac_cap = EDAC_FLAG_NONE;
+
+       amd64_enable_ecc_error_reporting(mci);
+       amd64_set_mc_sysfs_attributes(mci);
+
+       ret = -ENODEV;
+       if (edac_mc_add_mc(mci)) {
+               debugf1("failed edac_mc_add_mc()\n");
+               goto err_add_mc;
+       }
+
+       mci_lookup[node_id] = mci;
+       pvt_lookup[node_id] = NULL;
+       return 0;
+
+err_add_mc:
+       edac_mc_free(mci);
+
+err_exit:
+       debugf0("failure to init 2nd stage: ret=%d\n", ret);
+
+       amd64_restore_ecc_error_reporting(pvt);
+
+       if (boot_cpu_data.x86 > 0xf)
+               amd64_teardown(pvt);
+
+       amd64_free_mc_sibling_devices(pvt);
+
+       kfree(pvt_lookup[pvt->mc_node_id]);
+       pvt_lookup[node_id] = NULL;
+
+       return ret;
+}
+
+
+static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
+                                const struct pci_device_id *mc_type)
+{
+       int ret = 0;
+
+       debugf0("(MC node=%d,mc_type='%s')\n",
+               get_mc_node_id_from_pdev(pdev),
+               get_amd_family_name(mc_type->driver_data));
+
+       ret = pci_enable_device(pdev);
+       if (ret < 0)
+               ret = -EIO;
+       else
+               ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
+
+       if (ret < 0)
+               debugf0("ret=%d\n", ret);
+
+       return ret;
+}
+
+static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
+{
+       struct mem_ctl_info *mci;
+       struct amd64_pvt *pvt;
+
+       /* Remove from EDAC CORE tracking list */
+       mci = edac_mc_del_mc(&pdev->dev);
+       if (!mci)
+               return;
+
+       pvt = mci->pvt_info;
+
+       amd64_restore_ecc_error_reporting(pvt);
+
+       if (boot_cpu_data.x86 > 0xf)
+               amd64_teardown(pvt);
+
+       amd64_free_mc_sibling_devices(pvt);
+
+       kfree(pvt);
+       mci->pvt_info = NULL;
+
+       mci_lookup[pvt->mc_node_id] = NULL;
+
+       /* Free the EDAC CORE resources */
+       edac_mc_free(mci);
+}
+
+/*
+ * This table is part of the interface for loading drivers for PCI devices. The
+ * PCI core identifies what devices are on a system during boot, and then
+ * inquiry this table to see if this driver is for a given device found.
+ */
+static const struct pci_device_id amd64_pci_table[] __devinitdata = {
+       {
+               .vendor         = PCI_VENDOR_ID_AMD,
+               .device         = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .class          = 0,
+               .class_mask     = 0,
+               .driver_data    = K8_CPUS
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_AMD,
+               .device         = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .class          = 0,
+               .class_mask     = 0,
+               .driver_data    = F10_CPUS
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_AMD,
+               .device         = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .class          = 0,
+               .class_mask     = 0,
+               .driver_data    = F11_CPUS
+       },
+       {0, }
+};
+MODULE_DEVICE_TABLE(pci, amd64_pci_table);
+
+static struct pci_driver amd64_pci_driver = {
+       .name           = EDAC_MOD_STR,
+       .probe          = amd64_init_one_instance,
+       .remove         = __devexit_p(amd64_remove_one_instance),
+       .id_table       = amd64_pci_table,
+};
+
+static void amd64_setup_pci_device(void)
+{
+       struct mem_ctl_info *mci;
+       struct amd64_pvt *pvt;
+
+       if (amd64_ctl_pci)
+               return;
+
+       mci = mci_lookup[0];
+       if (mci) {
+
+               pvt = mci->pvt_info;
+               amd64_ctl_pci =
+                       edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
+                                                   EDAC_MOD_STR);
+
+               if (!amd64_ctl_pci) {
+                       pr_warning("%s(): Unable to create PCI control\n",
+                                  __func__);
+
+                       pr_warning("%s(): PCI error report via EDAC not set\n",
+                                  __func__);
+                       }
+       }
+}
+
+static int __init amd64_edac_init(void)
+{
+       int nb, err = -ENODEV;
+
+       edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
+
+       opstate_init();
+
+       if (cache_k8_northbridges() < 0)
+               goto err_exit;
+
+       err = pci_register_driver(&amd64_pci_driver);
+       if (err)
+               return err;
+
+       /*
+        * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
+        * amd64_pvt structs. These will be used in the 2nd stage init function
+        * to finish initialization of the MC instances.
+        */
+       for (nb = 0; nb < num_k8_northbridges; nb++) {
+               if (!pvt_lookup[nb])
+                       continue;
+
+               err = amd64_init_2nd_stage(pvt_lookup[nb]);
+               if (err)
+                       goto err_exit;
+       }
+
+       amd64_setup_pci_device();
+
+       return 0;
+
+err_exit:
+       debugf0("'finish_setup' stage failed\n");
+       pci_unregister_driver(&amd64_pci_driver);
+
+       return err;
+}
+
+static void __exit amd64_edac_exit(void)
+{
+       if (amd64_ctl_pci)
+               edac_pci_release_generic_ctl(amd64_ctl_pci);
+
+       pci_unregister_driver(&amd64_pci_driver);
+}
+
+module_init(amd64_edac_init);
+module_exit(amd64_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
+               "Dave Peterson, Thayne Harbaugh");
+MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
+               EDAC_AMD64_VERSION);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");