]> Pileus Git - ~andy/linux/blob - drivers/edac/i7core_edac.c
Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
[~andy/linux] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41
42 #include "edac_core.h"
43
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES        2
59
60
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)                       \
71         edac_printk(level, "i7core", fmt, ##arg)
72
73 #define i7core_mc_printk(mci, level, fmt, arg...)               \
74         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75
76 /*
77  * i7core Memory Controller Registers
78  */
79
80         /* OFFSETS for Device 0 Function 0 */
81
82 #define MC_CFG_CONTROL  0x90
83   #define MC_CFG_UNLOCK         0x02
84   #define MC_CFG_LOCK           0x00
85
86         /* OFFSETS for Device 3 Function 0 */
87
88 #define MC_CONTROL      0x48
89 #define MC_STATUS       0x4c
90 #define MC_MAX_DOD      0x64
91
92 /*
93  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96
97 #define MC_TEST_ERR_RCV1        0x60
98   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
99
100 #define MC_TEST_ERR_RCV0        0x64
101   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
103
104 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL           0x48
106   #define SSR_MODE_DISABLE      0x00
107   #define SSR_MODE_ENABLE       0x01
108   #define SSR_MODE_MASK         0x03
109
110 #define MC_SCRUB_CONTROL        0x4c
111   #define STARTSCRUB            (1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113
114 #define MC_COR_ECC_CNT_0        0x80
115 #define MC_COR_ECC_CNT_1        0x84
116 #define MC_COR_ECC_CNT_2        0x88
117 #define MC_COR_ECC_CNT_3        0x8c
118 #define MC_COR_ECC_CNT_4        0x90
119 #define MC_COR_ECC_CNT_5        0x94
120
121 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
123
124
125         /* OFFSETS for Devices 4,5 and 6 Function 0 */
126
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT           (1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
130   #define QUAD_RANK_PRESENT             (1 << 22)
131   #define REGISTERED_DIMM               (1 << 15)
132
133 #define MC_CHANNEL_MAPPER       0x60
134   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
136
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK             0xffff
139
140 #define MC_CHANNEL_ADDR_MATCH   0xf0
141 #define MC_CHANNEL_ERROR_MASK   0xf8
142 #define MC_CHANNEL_ERROR_INJECT 0xfc
143   #define INJECT_ADDR_PARITY    0x10
144   #define INJECT_ECC            0x08
145   #define MASK_CACHELINE        0x06
146   #define MASK_FULL_CACHELINE   0x06
147   #define MASK_MSB32_CACHELINE  0x04
148   #define MASK_LSB32_CACHELINE  0x02
149   #define NO_MASK_CACHELINE     0x00
150   #define REPEAT_EN             0x01
151
152         /* OFFSETS for Devices 4,5 and 6 Function 1 */
153
154 #define MC_DOD_CH_DIMM0         0x48
155 #define MC_DOD_CH_DIMM1         0x4c
156 #define MC_DOD_CH_DIMM2         0x50
157   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK     (1 << 9)
160   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK            3
168   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
169
170 #define MC_RANK_PRESENT         0x7c
171
172 #define MC_SAG_CH_0     0x80
173 #define MC_SAG_CH_1     0x84
174 #define MC_SAG_CH_2     0x88
175 #define MC_SAG_CH_3     0x8c
176 #define MC_SAG_CH_4     0x90
177 #define MC_SAG_CH_5     0x94
178 #define MC_SAG_CH_6     0x98
179 #define MC_SAG_CH_7     0x9c
180
181 #define MC_RIR_LIMIT_CH_0       0x40
182 #define MC_RIR_LIMIT_CH_1       0x44
183 #define MC_RIR_LIMIT_CH_2       0x48
184 #define MC_RIR_LIMIT_CH_3       0x4C
185 #define MC_RIR_LIMIT_CH_4       0x50
186 #define MC_RIR_LIMIT_CH_5       0x54
187 #define MC_RIR_LIMIT_CH_6       0x58
188 #define MC_RIR_LIMIT_CH_7       0x5C
189 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
190
191 #define MC_RIR_WAY_CH           0x80
192   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK          0x7
194
195 /*
196  * i7core structs
197  */
198
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3             /* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203
204 struct i7core_info {
205         u32     mc_control;
206         u32     mc_status;
207         u32     max_dod;
208         u32     ch_map;
209 };
210
211
212 struct i7core_inject {
213         int     enable;
214
215         u32     section;
216         u32     type;
217         u32     eccmask;
218
219         /* Error address mask */
220         int channel, dimm, rank, bank, page, col;
221 };
222
223 struct i7core_channel {
224         bool            is_3dimms_present;
225         bool            is_single_4rank;
226         bool            has_4rank;
227         u32             dimms;
228 };
229
230 struct pci_id_descr {
231         int                     dev;
232         int                     func;
233         int                     dev_id;
234         int                     optional;
235 };
236
237 struct pci_id_table {
238         const struct pci_id_descr       *descr;
239         int                             n_devs;
240 };
241
242 struct i7core_dev {
243         struct list_head        list;
244         u8                      socket;
245         struct pci_dev          **pdev;
246         int                     n_devs;
247         struct mem_ctl_info     *mci;
248 };
249
250 struct i7core_pvt {
251         struct pci_dev  *pci_noncore;
252         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
253         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
254
255         struct i7core_dev *i7core_dev;
256
257         struct i7core_info      info;
258         struct i7core_inject    inject;
259         struct i7core_channel   channel[NUM_CHANS];
260
261         int             ce_count_available;
262
263                         /* ECC corrected errors counts per udimm */
264         unsigned long   udimm_ce_count[MAX_DIMMS];
265         int             udimm_last_ce_count[MAX_DIMMS];
266                         /* ECC corrected errors counts per rdimm */
267         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
268         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
269
270         bool            is_registered, enable_scrub;
271
272         /* Fifo double buffers */
273         struct mce              mce_entry[MCE_LOG_LEN];
274         struct mce              mce_outentry[MCE_LOG_LEN];
275
276         /* Fifo in/out counters */
277         unsigned                mce_in, mce_out;
278
279         /* Count indicator to show errors not got */
280         unsigned                mce_overrun;
281
282         /* DCLK Frequency used for computing scrub rate */
283         int                     dclk_freq;
284
285         /* Struct to control EDAC polling */
286         struct edac_pci_ctl_info *i7core_pci;
287 };
288
289 #define PCI_DESCR(device, function, device_id)  \
290         .dev = (device),                        \
291         .func = (function),                     \
292         .dev_id = (device_id)
293
294 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
295                 /* Memory controller */
296         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
297         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
298                         /* Exists only for RDIMM */
299         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
300         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
301
302                 /* Channel 0 */
303         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
304         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
305         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
306         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
307
308                 /* Channel 1 */
309         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
310         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
311         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
312         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
313
314                 /* Channel 2 */
315         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
316         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
317         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
318         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
319
320                 /* Generic Non-core registers */
321         /*
322          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
323          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
324          * the probing code needs to test for the other address in case of
325          * failure of this one
326          */
327         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
328
329 };
330
331 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
332         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
333         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
334         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
335
336         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
337         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
338         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
339         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
340
341         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
342         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
343         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
344         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
345
346         /*
347          * This is the PCI device has an alternate address on some
348          * processors like Core i7 860
349          */
350         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
351 };
352
353 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
354                 /* Memory controller */
355         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
356         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
357                         /* Exists only for RDIMM */
358         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
359         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
360
361                 /* Channel 0 */
362         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
363         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
364         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
365         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
366
367                 /* Channel 1 */
368         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
369         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
370         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
371         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
372
373                 /* Channel 2 */
374         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
375         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
376         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
377         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
378
379                 /* Generic Non-core registers */
380         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
381
382 };
383
384 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
385 static const struct pci_id_table pci_dev_table[] = {
386         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
387         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
388         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
389         {0,}                    /* 0 terminated list. */
390 };
391
392 /*
393  *      pci_device_id   table for which devices we are looking for
394  */
395 static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
396         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
397         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
398         {0,}                    /* 0 terminated list. */
399 };
400
401 /****************************************************************************
402                         Ancillary status routines
403  ****************************************************************************/
404
405         /* MC_CONTROL bits */
406 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
407 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
408
409         /* MC_STATUS bits */
410 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
411 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
412
413         /* MC_MAX_DOD read functions */
414 static inline int numdimms(u32 dimms)
415 {
416         return (dimms & 0x3) + 1;
417 }
418
419 static inline int numrank(u32 rank)
420 {
421         static int ranks[4] = { 1, 2, 4, -EINVAL };
422
423         return ranks[rank & 0x3];
424 }
425
426 static inline int numbank(u32 bank)
427 {
428         static int banks[4] = { 4, 8, 16, -EINVAL };
429
430         return banks[bank & 0x3];
431 }
432
433 static inline int numrow(u32 row)
434 {
435         static int rows[8] = {
436                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
437                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
438         };
439
440         return rows[row & 0x7];
441 }
442
443 static inline int numcol(u32 col)
444 {
445         static int cols[8] = {
446                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
447         };
448         return cols[col & 0x3];
449 }
450
451 static struct i7core_dev *get_i7core_dev(u8 socket)
452 {
453         struct i7core_dev *i7core_dev;
454
455         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
456                 if (i7core_dev->socket == socket)
457                         return i7core_dev;
458         }
459
460         return NULL;
461 }
462
463 static struct i7core_dev *alloc_i7core_dev(u8 socket,
464                                            const struct pci_id_table *table)
465 {
466         struct i7core_dev *i7core_dev;
467
468         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
469         if (!i7core_dev)
470                 return NULL;
471
472         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
473                                    GFP_KERNEL);
474         if (!i7core_dev->pdev) {
475                 kfree(i7core_dev);
476                 return NULL;
477         }
478
479         i7core_dev->socket = socket;
480         i7core_dev->n_devs = table->n_devs;
481         list_add_tail(&i7core_dev->list, &i7core_edac_list);
482
483         return i7core_dev;
484 }
485
486 static void free_i7core_dev(struct i7core_dev *i7core_dev)
487 {
488         list_del(&i7core_dev->list);
489         kfree(i7core_dev->pdev);
490         kfree(i7core_dev);
491 }
492
493 /****************************************************************************
494                         Memory check routines
495  ****************************************************************************/
496
497 static int get_dimm_config(struct mem_ctl_info *mci)
498 {
499         struct i7core_pvt *pvt = mci->pvt_info;
500         struct pci_dev *pdev;
501         int i, j;
502         enum edac_type mode;
503         enum mem_type mtype;
504         struct dimm_info *dimm;
505
506         /* Get data from the MC register, function 0 */
507         pdev = pvt->pci_mcr[0];
508         if (!pdev)
509                 return -ENODEV;
510
511         /* Device 3 function 0 reads */
512         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
513         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
514         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
515         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
516
517         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
518                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
519                 pvt->info.max_dod, pvt->info.ch_map);
520
521         if (ECC_ENABLED(pvt)) {
522                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
523                 if (ECCx8(pvt))
524                         mode = EDAC_S8ECD8ED;
525                 else
526                         mode = EDAC_S4ECD4ED;
527         } else {
528                 debugf0("ECC disabled\n");
529                 mode = EDAC_NONE;
530         }
531
532         /* FIXME: need to handle the error codes */
533         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
534                 "x%x x 0x%x\n",
535                 numdimms(pvt->info.max_dod),
536                 numrank(pvt->info.max_dod >> 2),
537                 numbank(pvt->info.max_dod >> 4),
538                 numrow(pvt->info.max_dod >> 6),
539                 numcol(pvt->info.max_dod >> 9));
540
541         for (i = 0; i < NUM_CHANS; i++) {
542                 u32 data, dimm_dod[3], value[8];
543
544                 if (!pvt->pci_ch[i][0])
545                         continue;
546
547                 if (!CH_ACTIVE(pvt, i)) {
548                         debugf0("Channel %i is not active\n", i);
549                         continue;
550                 }
551                 if (CH_DISABLED(pvt, i)) {
552                         debugf0("Channel %i is disabled\n", i);
553                         continue;
554                 }
555
556                 /* Devices 4-6 function 0 */
557                 pci_read_config_dword(pvt->pci_ch[i][0],
558                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
559
560
561                 if (data & THREE_DIMMS_PRESENT)
562                         pvt->channel[i].is_3dimms_present = true;
563
564                 if (data & SINGLE_QUAD_RANK_PRESENT)
565                         pvt->channel[i].is_single_4rank = true;
566
567                 if (data & QUAD_RANK_PRESENT)
568                         pvt->channel[i].has_4rank = true;
569
570                 if (data & REGISTERED_DIMM)
571                         mtype = MEM_RDDR3;
572                 else
573                         mtype = MEM_DDR3;
574
575                 /* Devices 4-6 function 1 */
576                 pci_read_config_dword(pvt->pci_ch[i][1],
577                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
578                 pci_read_config_dword(pvt->pci_ch[i][1],
579                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
580                 pci_read_config_dword(pvt->pci_ch[i][1],
581                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
582
583                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
584                         "%s%s%s%cDIMMs\n",
585                         i,
586                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587                         data,
588                         pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589                         pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590                         pvt->channel[i].has_4rank ? "HAS_4R " : "",
591                         (data & REGISTERED_DIMM) ? 'R' : 'U');
592
593                 for (j = 0; j < 3; j++) {
594                         u32 banks, ranks, rows, cols;
595                         u32 size, npages;
596
597                         if (!DIMM_PRESENT(dimm_dod[j]))
598                                 continue;
599
600                         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601                                        i, j, 0);
602                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606
607                         /* DDR3 has 8 I/O banks */
608                         size = (rows * cols * banks * ranks) >> (20 - 3);
609
610                         debugf0("\tdimm %d %d Mb offset: %x, "
611                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
612                                 j, size,
613                                 RANKOFFSET(dimm_dod[j]),
614                                 banks, ranks, rows, cols);
615
616                         npages = MiB_TO_PAGES(size);
617
618                         dimm->nr_pages = npages;
619
620                         switch (banks) {
621                         case 4:
622                                 dimm->dtype = DEV_X4;
623                                 break;
624                         case 8:
625                                 dimm->dtype = DEV_X8;
626                                 break;
627                         case 16:
628                                 dimm->dtype = DEV_X16;
629                                 break;
630                         default:
631                                 dimm->dtype = DEV_UNKNOWN;
632                         }
633
634                         snprintf(dimm->label, sizeof(dimm->label),
635                                  "CPU#%uChannel#%u_DIMM#%u",
636                                  pvt->i7core_dev->socket, i, j);
637                         dimm->grain = 8;
638                         dimm->edac_mode = mode;
639                         dimm->mtype = mtype;
640                 }
641
642                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
643                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
644                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
645                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
646                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
647                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
648                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
649                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
650                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
651                 for (j = 0; j < 8; j++)
652                         debugf1("\t\t%#x\t%#x\t%#x\n",
653                                 (value[j] >> 27) & 0x1,
654                                 (value[j] >> 24) & 0x7,
655                                 (value[j] & ((1 << 24) - 1)));
656         }
657
658         return 0;
659 }
660
661 /****************************************************************************
662                         Error insertion routines
663  ****************************************************************************/
664
665 /* The i7core has independent error injection features per channel.
666    However, to have a simpler code, we don't allow enabling error injection
667    on more than one channel.
668    Also, since a change at an inject parameter will be applied only at enable,
669    we're disabling error injection on all write calls to the sysfs nodes that
670    controls the error code injection.
671  */
672 static int disable_inject(const struct mem_ctl_info *mci)
673 {
674         struct i7core_pvt *pvt = mci->pvt_info;
675
676         pvt->inject.enable = 0;
677
678         if (!pvt->pci_ch[pvt->inject.channel][0])
679                 return -ENODEV;
680
681         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
682                                 MC_CHANNEL_ERROR_INJECT, 0);
683
684         return 0;
685 }
686
687 /*
688  * i7core inject inject.section
689  *
690  *      accept and store error injection inject.section value
691  *      bit 0 - refers to the lower 32-byte half cacheline
692  *      bit 1 - refers to the upper 32-byte half cacheline
693  */
694 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
695                                            const char *data, size_t count)
696 {
697         struct i7core_pvt *pvt = mci->pvt_info;
698         unsigned long value;
699         int rc;
700
701         if (pvt->inject.enable)
702                 disable_inject(mci);
703
704         rc = strict_strtoul(data, 10, &value);
705         if ((rc < 0) || (value > 3))
706                 return -EIO;
707
708         pvt->inject.section = (u32) value;
709         return count;
710 }
711
712 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
713                                               char *data)
714 {
715         struct i7core_pvt *pvt = mci->pvt_info;
716         return sprintf(data, "0x%08x\n", pvt->inject.section);
717 }
718
719 /*
720  * i7core inject.type
721  *
722  *      accept and store error injection inject.section value
723  *      bit 0 - repeat enable - Enable error repetition
724  *      bit 1 - inject ECC error
725  *      bit 2 - inject parity error
726  */
727 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
728                                         const char *data, size_t count)
729 {
730         struct i7core_pvt *pvt = mci->pvt_info;
731         unsigned long value;
732         int rc;
733
734         if (pvt->inject.enable)
735                 disable_inject(mci);
736
737         rc = strict_strtoul(data, 10, &value);
738         if ((rc < 0) || (value > 7))
739                 return -EIO;
740
741         pvt->inject.type = (u32) value;
742         return count;
743 }
744
745 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
746                                               char *data)
747 {
748         struct i7core_pvt *pvt = mci->pvt_info;
749         return sprintf(data, "0x%08x\n", pvt->inject.type);
750 }
751
752 /*
753  * i7core_inject_inject.eccmask_store
754  *
755  * The type of error (UE/CE) will depend on the inject.eccmask value:
756  *   Any bits set to a 1 will flip the corresponding ECC bit
757  *   Correctable errors can be injected by flipping 1 bit or the bits within
758  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
759  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
760  *   uncorrectable error to be injected.
761  */
762 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
763                                         const char *data, size_t count)
764 {
765         struct i7core_pvt *pvt = mci->pvt_info;
766         unsigned long value;
767         int rc;
768
769         if (pvt->inject.enable)
770                 disable_inject(mci);
771
772         rc = strict_strtoul(data, 10, &value);
773         if (rc < 0)
774                 return -EIO;
775
776         pvt->inject.eccmask = (u32) value;
777         return count;
778 }
779
780 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
781                                               char *data)
782 {
783         struct i7core_pvt *pvt = mci->pvt_info;
784         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
785 }
786
787 /*
788  * i7core_addrmatch
789  *
790  * The type of error (UE/CE) will depend on the inject.eccmask value:
791  *   Any bits set to a 1 will flip the corresponding ECC bit
792  *   Correctable errors can be injected by flipping 1 bit or the bits within
793  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
794  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
795  *   uncorrectable error to be injected.
796  */
797
798 #define DECLARE_ADDR_MATCH(param, limit)                        \
799 static ssize_t i7core_inject_store_##param(                     \
800                 struct mem_ctl_info *mci,                       \
801                 const char *data, size_t count)                 \
802 {                                                               \
803         struct i7core_pvt *pvt;                                 \
804         long value;                                             \
805         int rc;                                                 \
806                                                                 \
807         debugf1("%s()\n", __func__);                            \
808         pvt = mci->pvt_info;                                    \
809                                                                 \
810         if (pvt->inject.enable)                                 \
811                 disable_inject(mci);                            \
812                                                                 \
813         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
814                 value = -1;                                     \
815         else {                                                  \
816                 rc = strict_strtoul(data, 10, &value);          \
817                 if ((rc < 0) || (value >= limit))               \
818                         return -EIO;                            \
819         }                                                       \
820                                                                 \
821         pvt->inject.param = value;                              \
822                                                                 \
823         return count;                                           \
824 }                                                               \
825                                                                 \
826 static ssize_t i7core_inject_show_##param(                      \
827                 struct mem_ctl_info *mci,                       \
828                 char *data)                                     \
829 {                                                               \
830         struct i7core_pvt *pvt;                                 \
831                                                                 \
832         pvt = mci->pvt_info;                                    \
833         debugf1("%s() pvt=%p\n", __func__, pvt);                \
834         if (pvt->inject.param < 0)                              \
835                 return sprintf(data, "any\n");                  \
836         else                                                    \
837                 return sprintf(data, "%d\n", pvt->inject.param);\
838 }
839
840 #define ATTR_ADDR_MATCH(param)                                  \
841         {                                                       \
842                 .attr = {                                       \
843                         .name = #param,                         \
844                         .mode = (S_IRUGO | S_IWUSR)             \
845                 },                                              \
846                 .show  = i7core_inject_show_##param,            \
847                 .store = i7core_inject_store_##param,           \
848         }
849
850 DECLARE_ADDR_MATCH(channel, 3);
851 DECLARE_ADDR_MATCH(dimm, 3);
852 DECLARE_ADDR_MATCH(rank, 4);
853 DECLARE_ADDR_MATCH(bank, 32);
854 DECLARE_ADDR_MATCH(page, 0x10000);
855 DECLARE_ADDR_MATCH(col, 0x4000);
856
857 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
858 {
859         u32 read;
860         int count;
861
862         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
863                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
864                 where, val);
865
866         for (count = 0; count < 10; count++) {
867                 if (count)
868                         msleep(100);
869                 pci_write_config_dword(dev, where, val);
870                 pci_read_config_dword(dev, where, &read);
871
872                 if (read == val)
873                         return 0;
874         }
875
876         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
877                 "write=%08x. Read=%08x\n",
878                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
879                 where, val, read);
880
881         return -EINVAL;
882 }
883
884 /*
885  * This routine prepares the Memory Controller for error injection.
886  * The error will be injected when some process tries to write to the
887  * memory that matches the given criteria.
888  * The criteria can be set in terms of a mask where dimm, rank, bank, page
889  * and col can be specified.
890  * A -1 value for any of the mask items will make the MCU to ignore
891  * that matching criteria for error injection.
892  *
893  * It should be noticed that the error will only happen after a write operation
894  * on a memory that matches the condition. if REPEAT_EN is not enabled at
895  * inject mask, then it will produce just one error. Otherwise, it will repeat
896  * until the injectmask would be cleaned.
897  *
898  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
899  *    is reliable enough to check if the MC is using the
900  *    three channels. However, this is not clear at the datasheet.
901  */
902 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
903                                        const char *data, size_t count)
904 {
905         struct i7core_pvt *pvt = mci->pvt_info;
906         u32 injectmask;
907         u64 mask = 0;
908         int  rc;
909         long enable;
910
911         if (!pvt->pci_ch[pvt->inject.channel][0])
912                 return 0;
913
914         rc = strict_strtoul(data, 10, &enable);
915         if ((rc < 0))
916                 return 0;
917
918         if (enable) {
919                 pvt->inject.enable = 1;
920         } else {
921                 disable_inject(mci);
922                 return count;
923         }
924
925         /* Sets pvt->inject.dimm mask */
926         if (pvt->inject.dimm < 0)
927                 mask |= 1LL << 41;
928         else {
929                 if (pvt->channel[pvt->inject.channel].dimms > 2)
930                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
931                 else
932                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
933         }
934
935         /* Sets pvt->inject.rank mask */
936         if (pvt->inject.rank < 0)
937                 mask |= 1LL << 40;
938         else {
939                 if (pvt->channel[pvt->inject.channel].dimms > 2)
940                         mask |= (pvt->inject.rank & 0x1LL) << 34;
941                 else
942                         mask |= (pvt->inject.rank & 0x3LL) << 34;
943         }
944
945         /* Sets pvt->inject.bank mask */
946         if (pvt->inject.bank < 0)
947                 mask |= 1LL << 39;
948         else
949                 mask |= (pvt->inject.bank & 0x15LL) << 30;
950
951         /* Sets pvt->inject.page mask */
952         if (pvt->inject.page < 0)
953                 mask |= 1LL << 38;
954         else
955                 mask |= (pvt->inject.page & 0xffff) << 14;
956
957         /* Sets pvt->inject.column mask */
958         if (pvt->inject.col < 0)
959                 mask |= 1LL << 37;
960         else
961                 mask |= (pvt->inject.col & 0x3fff);
962
963         /*
964          * bit    0: REPEAT_EN
965          * bits 1-2: MASK_HALF_CACHELINE
966          * bit    3: INJECT_ECC
967          * bit    4: INJECT_ADDR_PARITY
968          */
969
970         injectmask = (pvt->inject.type & 1) |
971                      (pvt->inject.section & 0x3) << 1 |
972                      (pvt->inject.type & 0x6) << (3 - 1);
973
974         /* Unlock writes to registers - this register is write only */
975         pci_write_config_dword(pvt->pci_noncore,
976                                MC_CFG_CONTROL, 0x2);
977
978         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
979                                MC_CHANNEL_ADDR_MATCH, mask);
980         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
981                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
982
983         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
984                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
985
986         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
987                                MC_CHANNEL_ERROR_INJECT, injectmask);
988
989         /*
990          * This is something undocumented, based on my tests
991          * Without writing 8 to this register, errors aren't injected. Not sure
992          * why.
993          */
994         pci_write_config_dword(pvt->pci_noncore,
995                                MC_CFG_CONTROL, 8);
996
997         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
998                 " inject 0x%08x\n",
999                 mask, pvt->inject.eccmask, injectmask);
1000
1001
1002         return count;
1003 }
1004
1005 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1006                                         char *data)
1007 {
1008         struct i7core_pvt *pvt = mci->pvt_info;
1009         u32 injectmask;
1010
1011         if (!pvt->pci_ch[pvt->inject.channel][0])
1012                 return 0;
1013
1014         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1015                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1016
1017         debugf0("Inject error read: 0x%018x\n", injectmask);
1018
1019         if (injectmask & 0x0c)
1020                 pvt->inject.enable = 1;
1021
1022         return sprintf(data, "%d\n", pvt->inject.enable);
1023 }
1024
1025 #define DECLARE_COUNTER(param)                                  \
1026 static ssize_t i7core_show_counter_##param(                     \
1027                 struct mem_ctl_info *mci,                       \
1028                 char *data)                                     \
1029 {                                                               \
1030         struct i7core_pvt *pvt = mci->pvt_info;                 \
1031                                                                 \
1032         debugf1("%s() \n", __func__);                           \
1033         if (!pvt->ce_count_available || (pvt->is_registered))   \
1034                 return sprintf(data, "data unavailable\n");     \
1035         return sprintf(data, "%lu\n",                           \
1036                         pvt->udimm_ce_count[param]);            \
1037 }
1038
1039 #define ATTR_COUNTER(param)                                     \
1040         {                                                       \
1041                 .attr = {                                       \
1042                         .name = __stringify(udimm##param),      \
1043                         .mode = (S_IRUGO | S_IWUSR)             \
1044                 },                                              \
1045                 .show  = i7core_show_counter_##param            \
1046         }
1047
1048 DECLARE_COUNTER(0);
1049 DECLARE_COUNTER(1);
1050 DECLARE_COUNTER(2);
1051
1052 /*
1053  * Sysfs struct
1054  */
1055
1056 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1057         ATTR_ADDR_MATCH(channel),
1058         ATTR_ADDR_MATCH(dimm),
1059         ATTR_ADDR_MATCH(rank),
1060         ATTR_ADDR_MATCH(bank),
1061         ATTR_ADDR_MATCH(page),
1062         ATTR_ADDR_MATCH(col),
1063         { } /* End of list */
1064 };
1065
1066 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1067         .name  = "inject_addrmatch",
1068         .mcidev_attr = i7core_addrmatch_attrs,
1069 };
1070
1071 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1072         ATTR_COUNTER(0),
1073         ATTR_COUNTER(1),
1074         ATTR_COUNTER(2),
1075         { .attr = { .name = NULL } }
1076 };
1077
1078 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1079         .name  = "all_channel_counts",
1080         .mcidev_attr = i7core_udimm_counters_attrs,
1081 };
1082
1083 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1084         {
1085                 .attr = {
1086                         .name = "inject_section",
1087                         .mode = (S_IRUGO | S_IWUSR)
1088                 },
1089                 .show  = i7core_inject_section_show,
1090                 .store = i7core_inject_section_store,
1091         }, {
1092                 .attr = {
1093                         .name = "inject_type",
1094                         .mode = (S_IRUGO | S_IWUSR)
1095                 },
1096                 .show  = i7core_inject_type_show,
1097                 .store = i7core_inject_type_store,
1098         }, {
1099                 .attr = {
1100                         .name = "inject_eccmask",
1101                         .mode = (S_IRUGO | S_IWUSR)
1102                 },
1103                 .show  = i7core_inject_eccmask_show,
1104                 .store = i7core_inject_eccmask_store,
1105         }, {
1106                 .grp = &i7core_inject_addrmatch,
1107         }, {
1108                 .attr = {
1109                         .name = "inject_enable",
1110                         .mode = (S_IRUGO | S_IWUSR)
1111                 },
1112                 .show  = i7core_inject_enable_show,
1113                 .store = i7core_inject_enable_store,
1114         },
1115         { }     /* End of list */
1116 };
1117
1118 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1119         {
1120                 .attr = {
1121                         .name = "inject_section",
1122                         .mode = (S_IRUGO | S_IWUSR)
1123                 },
1124                 .show  = i7core_inject_section_show,
1125                 .store = i7core_inject_section_store,
1126         }, {
1127                 .attr = {
1128                         .name = "inject_type",
1129                         .mode = (S_IRUGO | S_IWUSR)
1130                 },
1131                 .show  = i7core_inject_type_show,
1132                 .store = i7core_inject_type_store,
1133         }, {
1134                 .attr = {
1135                         .name = "inject_eccmask",
1136                         .mode = (S_IRUGO | S_IWUSR)
1137                 },
1138                 .show  = i7core_inject_eccmask_show,
1139                 .store = i7core_inject_eccmask_store,
1140         }, {
1141                 .grp = &i7core_inject_addrmatch,
1142         }, {
1143                 .attr = {
1144                         .name = "inject_enable",
1145                         .mode = (S_IRUGO | S_IWUSR)
1146                 },
1147                 .show  = i7core_inject_enable_show,
1148                 .store = i7core_inject_enable_store,
1149         }, {
1150                 .grp = &i7core_udimm_counters,
1151         },
1152         { }     /* End of list */
1153 };
1154
1155 /****************************************************************************
1156         Device initialization routines: put/get, init/exit
1157  ****************************************************************************/
1158
1159 /*
1160  *      i7core_put_all_devices  'put' all the devices that we have
1161  *                              reserved via 'get'
1162  */
1163 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1164 {
1165         int i;
1166
1167         debugf0(__FILE__ ": %s()\n", __func__);
1168         for (i = 0; i < i7core_dev->n_devs; i++) {
1169                 struct pci_dev *pdev = i7core_dev->pdev[i];
1170                 if (!pdev)
1171                         continue;
1172                 debugf0("Removing dev %02x:%02x.%d\n",
1173                         pdev->bus->number,
1174                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1175                 pci_dev_put(pdev);
1176         }
1177 }
1178
1179 static void i7core_put_all_devices(void)
1180 {
1181         struct i7core_dev *i7core_dev, *tmp;
1182
1183         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1184                 i7core_put_devices(i7core_dev);
1185                 free_i7core_dev(i7core_dev);
1186         }
1187 }
1188
1189 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1190 {
1191         struct pci_dev *pdev = NULL;
1192         int i;
1193
1194         /*
1195          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1196          * aren't announced by acpi. So, we need to use a legacy scan probing
1197          * to detect them
1198          */
1199         while (table && table->descr) {
1200                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1201                 if (unlikely(!pdev)) {
1202                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1203                                 pcibios_scan_specific_bus(255-i);
1204                 }
1205                 pci_dev_put(pdev);
1206                 table++;
1207         }
1208 }
1209
1210 static unsigned i7core_pci_lastbus(void)
1211 {
1212         int last_bus = 0, bus;
1213         struct pci_bus *b = NULL;
1214
1215         while ((b = pci_find_next_bus(b)) != NULL) {
1216                 bus = b->number;
1217                 debugf0("Found bus %d\n", bus);
1218                 if (bus > last_bus)
1219                         last_bus = bus;
1220         }
1221
1222         debugf0("Last bus %d\n", last_bus);
1223
1224         return last_bus;
1225 }
1226
1227 /*
1228  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1229  *                      device/functions we want to reference for this driver
1230  *
1231  *                      Need to 'get' device 16 func 1 and func 2
1232  */
1233 static int i7core_get_onedevice(struct pci_dev **prev,
1234                                 const struct pci_id_table *table,
1235                                 const unsigned devno,
1236                                 const unsigned last_bus)
1237 {
1238         struct i7core_dev *i7core_dev;
1239         const struct pci_id_descr *dev_descr = &table->descr[devno];
1240
1241         struct pci_dev *pdev = NULL;
1242         u8 bus = 0;
1243         u8 socket = 0;
1244
1245         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1246                               dev_descr->dev_id, *prev);
1247
1248         /*
1249          * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1250          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1251          * to probe for the alternate address in case of failure
1252          */
1253         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1254                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1255                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1256
1257         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1258                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1259                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1260                                       *prev);
1261
1262         if (!pdev) {
1263                 if (*prev) {
1264                         *prev = pdev;
1265                         return 0;
1266                 }
1267
1268                 if (dev_descr->optional)
1269                         return 0;
1270
1271                 if (devno == 0)
1272                         return -ENODEV;
1273
1274                 i7core_printk(KERN_INFO,
1275                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1276                         dev_descr->dev, dev_descr->func,
1277                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1278
1279                 /* End of list, leave */
1280                 return -ENODEV;
1281         }
1282         bus = pdev->bus->number;
1283
1284         socket = last_bus - bus;
1285
1286         i7core_dev = get_i7core_dev(socket);
1287         if (!i7core_dev) {
1288                 i7core_dev = alloc_i7core_dev(socket, table);
1289                 if (!i7core_dev) {
1290                         pci_dev_put(pdev);
1291                         return -ENOMEM;
1292                 }
1293         }
1294
1295         if (i7core_dev->pdev[devno]) {
1296                 i7core_printk(KERN_ERR,
1297                         "Duplicated device for "
1298                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1299                         bus, dev_descr->dev, dev_descr->func,
1300                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1301                 pci_dev_put(pdev);
1302                 return -ENODEV;
1303         }
1304
1305         i7core_dev->pdev[devno] = pdev;
1306
1307         /* Sanity check */
1308         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1309                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1310                 i7core_printk(KERN_ERR,
1311                         "Device PCI ID %04x:%04x "
1312                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1313                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1314                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1315                         bus, dev_descr->dev, dev_descr->func);
1316                 return -ENODEV;
1317         }
1318
1319         /* Be sure that the device is enabled */
1320         if (unlikely(pci_enable_device(pdev) < 0)) {
1321                 i7core_printk(KERN_ERR,
1322                         "Couldn't enable "
1323                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1324                         bus, dev_descr->dev, dev_descr->func,
1325                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1326                 return -ENODEV;
1327         }
1328
1329         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1330                 socket, bus, dev_descr->dev,
1331                 dev_descr->func,
1332                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1333
1334         /*
1335          * As stated on drivers/pci/search.c, the reference count for
1336          * @from is always decremented if it is not %NULL. So, as we need
1337          * to get all devices up to null, we need to do a get for the device
1338          */
1339         pci_dev_get(pdev);
1340
1341         *prev = pdev;
1342
1343         return 0;
1344 }
1345
1346 static int i7core_get_all_devices(void)
1347 {
1348         int i, rc, last_bus;
1349         struct pci_dev *pdev = NULL;
1350         const struct pci_id_table *table = pci_dev_table;
1351
1352         last_bus = i7core_pci_lastbus();
1353
1354         while (table && table->descr) {
1355                 for (i = 0; i < table->n_devs; i++) {
1356                         pdev = NULL;
1357                         do {
1358                                 rc = i7core_get_onedevice(&pdev, table, i,
1359                                                           last_bus);
1360                                 if (rc < 0) {
1361                                         if (i == 0) {
1362                                                 i = table->n_devs;
1363                                                 break;
1364                                         }
1365                                         i7core_put_all_devices();
1366                                         return -ENODEV;
1367                                 }
1368                         } while (pdev);
1369                 }
1370                 table++;
1371         }
1372
1373         return 0;
1374 }
1375
1376 static int mci_bind_devs(struct mem_ctl_info *mci,
1377                          struct i7core_dev *i7core_dev)
1378 {
1379         struct i7core_pvt *pvt = mci->pvt_info;
1380         struct pci_dev *pdev;
1381         int i, func, slot;
1382         char *family;
1383
1384         pvt->is_registered = false;
1385         pvt->enable_scrub  = false;
1386         for (i = 0; i < i7core_dev->n_devs; i++) {
1387                 pdev = i7core_dev->pdev[i];
1388                 if (!pdev)
1389                         continue;
1390
1391                 func = PCI_FUNC(pdev->devfn);
1392                 slot = PCI_SLOT(pdev->devfn);
1393                 if (slot == 3) {
1394                         if (unlikely(func > MAX_MCR_FUNC))
1395                                 goto error;
1396                         pvt->pci_mcr[func] = pdev;
1397                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1398                         if (unlikely(func > MAX_CHAN_FUNC))
1399                                 goto error;
1400                         pvt->pci_ch[slot - 4][func] = pdev;
1401                 } else if (!slot && !func) {
1402                         pvt->pci_noncore = pdev;
1403
1404                         /* Detect the processor family */
1405                         switch (pdev->device) {
1406                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1407                                 family = "Xeon 35xx/ i7core";
1408                                 pvt->enable_scrub = false;
1409                                 break;
1410                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1411                                 family = "i7-800/i5-700";
1412                                 pvt->enable_scrub = false;
1413                                 break;
1414                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1415                                 family = "Xeon 34xx";
1416                                 pvt->enable_scrub = false;
1417                                 break;
1418                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1419                                 family = "Xeon 55xx";
1420                                 pvt->enable_scrub = true;
1421                                 break;
1422                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1423                                 family = "Xeon 56xx / i7-900";
1424                                 pvt->enable_scrub = true;
1425                                 break;
1426                         default:
1427                                 family = "unknown";
1428                                 pvt->enable_scrub = false;
1429                         }
1430                         debugf0("Detected a processor type %s\n", family);
1431                 } else
1432                         goto error;
1433
1434                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1435                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1436                         pdev, i7core_dev->socket);
1437
1438                 if (PCI_SLOT(pdev->devfn) == 3 &&
1439                         PCI_FUNC(pdev->devfn) == 2)
1440                         pvt->is_registered = true;
1441         }
1442
1443         return 0;
1444
1445 error:
1446         i7core_printk(KERN_ERR, "Device %d, function %d "
1447                       "is out of the expected range\n",
1448                       slot, func);
1449         return -EINVAL;
1450 }
1451
1452 /****************************************************************************
1453                         Error check routines
1454  ****************************************************************************/
1455 static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
1456                                       const int chan,
1457                                       const int dimm,
1458                                       const int add)
1459 {
1460         int i;
1461
1462         for (i = 0; i < add; i++) {
1463                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
1464                                      chan, dimm, -1, "error", "", NULL);
1465         }
1466 }
1467
1468 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1469                                          const int chan,
1470                                          const int new0,
1471                                          const int new1,
1472                                          const int new2)
1473 {
1474         struct i7core_pvt *pvt = mci->pvt_info;
1475         int add0 = 0, add1 = 0, add2 = 0;
1476         /* Updates CE counters if it is not the first time here */
1477         if (pvt->ce_count_available) {
1478                 /* Updates CE counters */
1479
1480                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1481                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1482                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1483
1484                 if (add2 < 0)
1485                         add2 += 0x7fff;
1486                 pvt->rdimm_ce_count[chan][2] += add2;
1487
1488                 if (add1 < 0)
1489                         add1 += 0x7fff;
1490                 pvt->rdimm_ce_count[chan][1] += add1;
1491
1492                 if (add0 < 0)
1493                         add0 += 0x7fff;
1494                 pvt->rdimm_ce_count[chan][0] += add0;
1495         } else
1496                 pvt->ce_count_available = 1;
1497
1498         /* Store the new values */
1499         pvt->rdimm_last_ce_count[chan][2] = new2;
1500         pvt->rdimm_last_ce_count[chan][1] = new1;
1501         pvt->rdimm_last_ce_count[chan][0] = new0;
1502
1503         /*updated the edac core */
1504         if (add0 != 0)
1505                 i7core_rdimm_update_errcount(mci, chan, 0, add0);
1506         if (add1 != 0)
1507                 i7core_rdimm_update_errcount(mci, chan, 1, add1);
1508         if (add2 != 0)
1509                 i7core_rdimm_update_errcount(mci, chan, 2, add2);
1510
1511 }
1512
1513 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1514 {
1515         struct i7core_pvt *pvt = mci->pvt_info;
1516         u32 rcv[3][2];
1517         int i, new0, new1, new2;
1518
1519         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1520         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1521                                                                 &rcv[0][0]);
1522         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1523                                                                 &rcv[0][1]);
1524         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1525                                                                 &rcv[1][0]);
1526         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1527                                                                 &rcv[1][1]);
1528         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1529                                                                 &rcv[2][0]);
1530         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1531                                                                 &rcv[2][1]);
1532         for (i = 0 ; i < 3; i++) {
1533                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1534                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1535                 /*if the channel has 3 dimms*/
1536                 if (pvt->channel[i].dimms > 2) {
1537                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1538                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1539                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1540                 } else {
1541                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1542                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1543                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1544                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1545                         new2 = 0;
1546                 }
1547
1548                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1549         }
1550 }
1551
1552 /* This function is based on the device 3 function 4 registers as described on:
1553  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1554  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1555  * also available at:
1556  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1557  */
1558 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1559 {
1560         struct i7core_pvt *pvt = mci->pvt_info;
1561         u32 rcv1, rcv0;
1562         int new0, new1, new2;
1563
1564         if (!pvt->pci_mcr[4]) {
1565                 debugf0("%s MCR registers not found\n", __func__);
1566                 return;
1567         }
1568
1569         /* Corrected test errors */
1570         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1571         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1572
1573         /* Store the new values */
1574         new2 = DIMM2_COR_ERR(rcv1);
1575         new1 = DIMM1_COR_ERR(rcv0);
1576         new0 = DIMM0_COR_ERR(rcv0);
1577
1578         /* Updates CE counters if it is not the first time here */
1579         if (pvt->ce_count_available) {
1580                 /* Updates CE counters */
1581                 int add0, add1, add2;
1582
1583                 add2 = new2 - pvt->udimm_last_ce_count[2];
1584                 add1 = new1 - pvt->udimm_last_ce_count[1];
1585                 add0 = new0 - pvt->udimm_last_ce_count[0];
1586
1587                 if (add2 < 0)
1588                         add2 += 0x7fff;
1589                 pvt->udimm_ce_count[2] += add2;
1590
1591                 if (add1 < 0)
1592                         add1 += 0x7fff;
1593                 pvt->udimm_ce_count[1] += add1;
1594
1595                 if (add0 < 0)
1596                         add0 += 0x7fff;
1597                 pvt->udimm_ce_count[0] += add0;
1598
1599                 if (add0 | add1 | add2)
1600                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1601                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1602                                       add0, add1, add2);
1603         } else
1604                 pvt->ce_count_available = 1;
1605
1606         /* Store the new values */
1607         pvt->udimm_last_ce_count[2] = new2;
1608         pvt->udimm_last_ce_count[1] = new1;
1609         pvt->udimm_last_ce_count[0] = new0;
1610 }
1611
1612 /*
1613  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1614  * Architectures Software Developer’s Manual Volume 3B.
1615  * Nehalem are defined as family 0x06, model 0x1a
1616  *
1617  * The MCA registers used here are the following ones:
1618  *     struct mce field MCA Register
1619  *     m->status        MSR_IA32_MC8_STATUS
1620  *     m->addr          MSR_IA32_MC8_ADDR
1621  *     m->misc          MSR_IA32_MC8_MISC
1622  * In the case of Nehalem, the error information is masked at .status and .misc
1623  * fields
1624  */
1625 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1626                                     const struct mce *m)
1627 {
1628         struct i7core_pvt *pvt = mci->pvt_info;
1629         char *type, *optype, *err, msg[80];
1630         enum hw_event_mc_err_type tp_event;
1631         unsigned long error = m->status & 0x1ff0000l;
1632         bool uncorrected_error = m->mcgstatus & 1ll << 61;
1633         bool ripv = m->mcgstatus & 1;
1634         u32 optypenum = (m->status >> 4) & 0x07;
1635         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1636         u32 dimm = (m->misc >> 16) & 0x3;
1637         u32 channel = (m->misc >> 18) & 0x3;
1638         u32 syndrome = m->misc >> 32;
1639         u32 errnum = find_first_bit(&error, 32);
1640
1641         if (uncorrected_error) {
1642                 if (ripv) {
1643                         type = "FATAL";
1644                         tp_event = HW_EVENT_ERR_FATAL;
1645                 } else {
1646                         type = "NON_FATAL";
1647                         tp_event = HW_EVENT_ERR_UNCORRECTED;
1648                 }
1649         } else {
1650                 type = "CORRECTED";
1651                 tp_event = HW_EVENT_ERR_CORRECTED;
1652         }
1653
1654         switch (optypenum) {
1655         case 0:
1656                 optype = "generic undef request";
1657                 break;
1658         case 1:
1659                 optype = "read error";
1660                 break;
1661         case 2:
1662                 optype = "write error";
1663                 break;
1664         case 3:
1665                 optype = "addr/cmd error";
1666                 break;
1667         case 4:
1668                 optype = "scrubbing error";
1669                 break;
1670         default:
1671                 optype = "reserved";
1672                 break;
1673         }
1674
1675         switch (errnum) {
1676         case 16:
1677                 err = "read ECC error";
1678                 break;
1679         case 17:
1680                 err = "RAS ECC error";
1681                 break;
1682         case 18:
1683                 err = "write parity error";
1684                 break;
1685         case 19:
1686                 err = "redundacy loss";
1687                 break;
1688         case 20:
1689                 err = "reserved";
1690                 break;
1691         case 21:
1692                 err = "memory range error";
1693                 break;
1694         case 22:
1695                 err = "RTID out of range";
1696                 break;
1697         case 23:
1698                 err = "address parity error";
1699                 break;
1700         case 24:
1701                 err = "byte enable parity error";
1702                 break;
1703         default:
1704                 err = "unknown";
1705         }
1706
1707         snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
1708
1709         /*
1710          * Call the helper to output message
1711          * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1712          * only one event
1713          */
1714         if (uncorrected_error || !pvt->is_registered)
1715                 edac_mc_handle_error(tp_event, mci,
1716                                      m->addr >> PAGE_SHIFT,
1717                                      m->addr & ~PAGE_MASK,
1718                                      syndrome,
1719                                      channel, dimm, -1,
1720                                      err, msg, m);
1721 }
1722
1723 /*
1724  *      i7core_check_error      Retrieve and process errors reported by the
1725  *                              hardware. Called by the Core module.
1726  */
1727 static void i7core_check_error(struct mem_ctl_info *mci)
1728 {
1729         struct i7core_pvt *pvt = mci->pvt_info;
1730         int i;
1731         unsigned count = 0;
1732         struct mce *m;
1733
1734         /*
1735          * MCE first step: Copy all mce errors into a temporary buffer
1736          * We use a double buffering here, to reduce the risk of
1737          * losing an error.
1738          */
1739         smp_rmb();
1740         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1741                 % MCE_LOG_LEN;
1742         if (!count)
1743                 goto check_ce_error;
1744
1745         m = pvt->mce_outentry;
1746         if (pvt->mce_in + count > MCE_LOG_LEN) {
1747                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1748
1749                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1750                 smp_wmb();
1751                 pvt->mce_in = 0;
1752                 count -= l;
1753                 m += l;
1754         }
1755         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1756         smp_wmb();
1757         pvt->mce_in += count;
1758
1759         smp_rmb();
1760         if (pvt->mce_overrun) {
1761                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1762                               pvt->mce_overrun);
1763                 smp_wmb();
1764                 pvt->mce_overrun = 0;
1765         }
1766
1767         /*
1768          * MCE second step: parse errors and display
1769          */
1770         for (i = 0; i < count; i++)
1771                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1772
1773         /*
1774          * Now, let's increment CE error counts
1775          */
1776 check_ce_error:
1777         if (!pvt->is_registered)
1778                 i7core_udimm_check_mc_ecc_err(mci);
1779         else
1780                 i7core_rdimm_check_mc_ecc_err(mci);
1781 }
1782
1783 /*
1784  * i7core_mce_check_error       Replicates mcelog routine to get errors
1785  *                              This routine simply queues mcelog errors, and
1786  *                              return. The error itself should be handled later
1787  *                              by i7core_check_error.
1788  * WARNING: As this routine should be called at NMI time, extra care should
1789  * be taken to avoid deadlocks, and to be as fast as possible.
1790  */
1791 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1792                                   void *data)
1793 {
1794         struct mce *mce = (struct mce *)data;
1795         struct i7core_dev *i7_dev;
1796         struct mem_ctl_info *mci;
1797         struct i7core_pvt *pvt;
1798
1799         i7_dev = get_i7core_dev(mce->socketid);
1800         if (!i7_dev)
1801                 return NOTIFY_BAD;
1802
1803         mci = i7_dev->mci;
1804         pvt = mci->pvt_info;
1805
1806         /*
1807          * Just let mcelog handle it if the error is
1808          * outside the memory controller
1809          */
1810         if (((mce->status & 0xffff) >> 7) != 1)
1811                 return NOTIFY_DONE;
1812
1813         /* Bank 8 registers are the only ones that we know how to handle */
1814         if (mce->bank != 8)
1815                 return NOTIFY_DONE;
1816
1817         smp_rmb();
1818         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1819                 smp_wmb();
1820                 pvt->mce_overrun++;
1821                 return NOTIFY_DONE;
1822         }
1823
1824         /* Copy memory error at the ringbuffer */
1825         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1826         smp_wmb();
1827         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1828
1829         /* Handle fatal errors immediately */
1830         if (mce->mcgstatus & 1)
1831                 i7core_check_error(mci);
1832
1833         /* Advise mcelog that the errors were handled */
1834         return NOTIFY_STOP;
1835 }
1836
1837 static struct notifier_block i7_mce_dec = {
1838         .notifier_call  = i7core_mce_check_error,
1839 };
1840
1841 struct memdev_dmi_entry {
1842         u8 type;
1843         u8 length;
1844         u16 handle;
1845         u16 phys_mem_array_handle;
1846         u16 mem_err_info_handle;
1847         u16 total_width;
1848         u16 data_width;
1849         u16 size;
1850         u8 form;
1851         u8 device_set;
1852         u8 device_locator;
1853         u8 bank_locator;
1854         u8 memory_type;
1855         u16 type_detail;
1856         u16 speed;
1857         u8 manufacturer;
1858         u8 serial_number;
1859         u8 asset_tag;
1860         u8 part_number;
1861         u8 attributes;
1862         u32 extended_size;
1863         u16 conf_mem_clk_speed;
1864 } __attribute__((__packed__));
1865
1866
1867 /*
1868  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1869  * memory devices show the same speed, and if they don't then consider
1870  * all speeds to be invalid.
1871  */
1872 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1873 {
1874         int *dclk_freq = _dclk_freq;
1875         u16 dmi_mem_clk_speed;
1876
1877         if (*dclk_freq == -1)
1878                 return;
1879
1880         if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1881                 struct memdev_dmi_entry *memdev_dmi_entry =
1882                         (struct memdev_dmi_entry *)dh;
1883                 unsigned long conf_mem_clk_speed_offset =
1884                         (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1885                         (unsigned long)&memdev_dmi_entry->type;
1886                 unsigned long speed_offset =
1887                         (unsigned long)&memdev_dmi_entry->speed -
1888                         (unsigned long)&memdev_dmi_entry->type;
1889
1890                 /* Check that a DIMM is present */
1891                 if (memdev_dmi_entry->size == 0)
1892                         return;
1893
1894                 /*
1895                  * Pick the configured speed if it's available, otherwise
1896                  * pick the DIMM speed, or we don't have a speed.
1897                  */
1898                 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1899                         dmi_mem_clk_speed =
1900                                 memdev_dmi_entry->conf_mem_clk_speed;
1901                 } else if (memdev_dmi_entry->length > speed_offset) {
1902                         dmi_mem_clk_speed = memdev_dmi_entry->speed;
1903                 } else {
1904                         *dclk_freq = -1;
1905                         return;
1906                 }
1907
1908                 if (*dclk_freq == 0) {
1909                         /* First pass, speed was 0 */
1910                         if (dmi_mem_clk_speed > 0) {
1911                                 /* Set speed if a valid speed is read */
1912                                 *dclk_freq = dmi_mem_clk_speed;
1913                         } else {
1914                                 /* Otherwise we don't have a valid speed */
1915                                 *dclk_freq = -1;
1916                         }
1917                 } else if (*dclk_freq > 0 &&
1918                            *dclk_freq != dmi_mem_clk_speed) {
1919                         /*
1920                          * If we have a speed, check that all DIMMS are the same
1921                          * speed, otherwise set the speed as invalid.
1922                          */
1923                         *dclk_freq = -1;
1924                 }
1925         }
1926 }
1927
1928 /*
1929  * The default DCLK frequency is used as a fallback if we
1930  * fail to find anything reliable in the DMI. The value
1931  * is taken straight from the datasheet.
1932  */
1933 #define DEFAULT_DCLK_FREQ 800
1934
1935 static int get_dclk_freq(void)
1936 {
1937         int dclk_freq = 0;
1938
1939         dmi_walk(decode_dclk, (void *)&dclk_freq);
1940
1941         if (dclk_freq < 1)
1942                 return DEFAULT_DCLK_FREQ;
1943
1944         return dclk_freq;
1945 }
1946
1947 /*
1948  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1949  *                              to hardware according to SCRUBINTERVAL formula
1950  *                              found in datasheet.
1951  */
1952 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1953 {
1954         struct i7core_pvt *pvt = mci->pvt_info;
1955         struct pci_dev *pdev;
1956         u32 dw_scrub;
1957         u32 dw_ssr;
1958
1959         /* Get data from the MC register, function 2 */
1960         pdev = pvt->pci_mcr[2];
1961         if (!pdev)
1962                 return -ENODEV;
1963
1964         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1965
1966         if (new_bw == 0) {
1967                 /* Prepare to disable petrol scrub */
1968                 dw_scrub &= ~STARTSCRUB;
1969                 /* Stop the patrol scrub engine */
1970                 write_and_test(pdev, MC_SCRUB_CONTROL,
1971                                dw_scrub & ~SCRUBINTERVAL_MASK);
1972
1973                 /* Get current status of scrub rate and set bit to disable */
1974                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1975                 dw_ssr &= ~SSR_MODE_MASK;
1976                 dw_ssr |= SSR_MODE_DISABLE;
1977         } else {
1978                 const int cache_line_size = 64;
1979                 const u32 freq_dclk_mhz = pvt->dclk_freq;
1980                 unsigned long long scrub_interval;
1981                 /*
1982                  * Translate the desired scrub rate to a register value and
1983                  * program the corresponding register value.
1984                  */
1985                 scrub_interval = (unsigned long long)freq_dclk_mhz *
1986                         cache_line_size * 1000000;
1987                 do_div(scrub_interval, new_bw);
1988
1989                 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1990                         return -EINVAL;
1991
1992                 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
1993
1994                 /* Start the patrol scrub engine */
1995                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
1996                                        STARTSCRUB | dw_scrub);
1997
1998                 /* Get current status of scrub rate and set bit to enable */
1999                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2000                 dw_ssr &= ~SSR_MODE_MASK;
2001                 dw_ssr |= SSR_MODE_ENABLE;
2002         }
2003         /* Disable or enable scrubbing */
2004         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2005
2006         return new_bw;
2007 }
2008
2009 /*
2010  * get_sdram_scrub_rate         This routine convert current scrub rate value
2011  *                              into byte/sec bandwidth according to
2012  *                              SCRUBINTERVAL formula found in datasheet.
2013  */
2014 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2015 {
2016         struct i7core_pvt *pvt = mci->pvt_info;
2017         struct pci_dev *pdev;
2018         const u32 cache_line_size = 64;
2019         const u32 freq_dclk_mhz = pvt->dclk_freq;
2020         unsigned long long scrub_rate;
2021         u32 scrubval;
2022
2023         /* Get data from the MC register, function 2 */
2024         pdev = pvt->pci_mcr[2];
2025         if (!pdev)
2026                 return -ENODEV;
2027
2028         /* Get current scrub control data */
2029         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2030
2031         /* Mask highest 8-bits to 0 */
2032         scrubval &=  SCRUBINTERVAL_MASK;
2033         if (!scrubval)
2034                 return 0;
2035
2036         /* Calculate scrub rate value into byte/sec bandwidth */
2037         scrub_rate =  (unsigned long long)freq_dclk_mhz *
2038                 1000000 * cache_line_size;
2039         do_div(scrub_rate, scrubval);
2040         return (int)scrub_rate;
2041 }
2042
2043 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2044 {
2045         struct i7core_pvt *pvt = mci->pvt_info;
2046         u32 pci_lock;
2047
2048         /* Unlock writes to pci registers */
2049         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2050         pci_lock &= ~0x3;
2051         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2052                                pci_lock | MC_CFG_UNLOCK);
2053
2054         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2055         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2056 }
2057
2058 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2059 {
2060         struct i7core_pvt *pvt = mci->pvt_info;
2061         u32 pci_lock;
2062
2063         /* Lock writes to pci registers */
2064         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2065         pci_lock &= ~0x3;
2066         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2067                                pci_lock | MC_CFG_LOCK);
2068 }
2069
2070 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2071 {
2072         pvt->i7core_pci = edac_pci_create_generic_ctl(
2073                                                 &pvt->i7core_dev->pdev[0]->dev,
2074                                                 EDAC_MOD_STR);
2075         if (unlikely(!pvt->i7core_pci))
2076                 i7core_printk(KERN_WARNING,
2077                               "Unable to setup PCI error report via EDAC\n");
2078 }
2079
2080 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2081 {
2082         if (likely(pvt->i7core_pci))
2083                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2084         else
2085                 i7core_printk(KERN_ERR,
2086                                 "Couldn't find mem_ctl_info for socket %d\n",
2087                                 pvt->i7core_dev->socket);
2088         pvt->i7core_pci = NULL;
2089 }
2090
2091 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2092 {
2093         struct mem_ctl_info *mci = i7core_dev->mci;
2094         struct i7core_pvt *pvt;
2095
2096         if (unlikely(!mci || !mci->pvt_info)) {
2097                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2098                         __func__, &i7core_dev->pdev[0]->dev);
2099
2100                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2101                 return;
2102         }
2103
2104         pvt = mci->pvt_info;
2105
2106         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2107                 __func__, mci, &i7core_dev->pdev[0]->dev);
2108
2109         /* Disable scrubrate setting */
2110         if (pvt->enable_scrub)
2111                 disable_sdram_scrub_setting(mci);
2112
2113         /* Disable EDAC polling */
2114         i7core_pci_ctl_release(pvt);
2115
2116         /* Remove MC sysfs nodes */
2117         edac_mc_del_mc(mci->dev);
2118
2119         debugf1("%s: free mci struct\n", mci->ctl_name);
2120         kfree(mci->ctl_name);
2121         edac_mc_free(mci);
2122         i7core_dev->mci = NULL;
2123 }
2124
2125 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2126 {
2127         struct mem_ctl_info *mci;
2128         struct i7core_pvt *pvt;
2129         int rc;
2130         struct edac_mc_layer layers[2];
2131
2132         /* allocate a new MC control structure */
2133
2134         layers[0].type = EDAC_MC_LAYER_CHANNEL;
2135         layers[0].size = NUM_CHANS;
2136         layers[0].is_virt_csrow = false;
2137         layers[1].type = EDAC_MC_LAYER_SLOT;
2138         layers[1].size = MAX_DIMMS;
2139         layers[1].is_virt_csrow = true;
2140         mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2141                             sizeof(*pvt));
2142         if (unlikely(!mci))
2143                 return -ENOMEM;
2144
2145         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2146                 __func__, mci, &i7core_dev->pdev[0]->dev);
2147
2148         pvt = mci->pvt_info;
2149         memset(pvt, 0, sizeof(*pvt));
2150
2151         /* Associates i7core_dev and mci for future usage */
2152         pvt->i7core_dev = i7core_dev;
2153         i7core_dev->mci = mci;
2154
2155         /*
2156          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2157          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2158          * memory channels
2159          */
2160         mci->mtype_cap = MEM_FLAG_DDR3;
2161         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2162         mci->edac_cap = EDAC_FLAG_NONE;
2163         mci->mod_name = "i7core_edac.c";
2164         mci->mod_ver = I7CORE_REVISION;
2165         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2166                                   i7core_dev->socket);
2167         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2168         mci->ctl_page_to_phys = NULL;
2169
2170         /* Store pci devices at mci for faster access */
2171         rc = mci_bind_devs(mci, i7core_dev);
2172         if (unlikely(rc < 0))
2173                 goto fail0;
2174
2175         if (pvt->is_registered)
2176                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2177         else
2178                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2179
2180         /* Get dimm basic config */
2181         get_dimm_config(mci);
2182         /* record ptr to the generic device */
2183         mci->dev = &i7core_dev->pdev[0]->dev;
2184         /* Set the function pointer to an actual operation function */
2185         mci->edac_check = i7core_check_error;
2186
2187         /* Enable scrubrate setting */
2188         if (pvt->enable_scrub)
2189                 enable_sdram_scrub_setting(mci);
2190
2191         /* add this new MC control structure to EDAC's list of MCs */
2192         if (unlikely(edac_mc_add_mc(mci))) {
2193                 debugf0("MC: " __FILE__
2194                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2195                 /* FIXME: perhaps some code should go here that disables error
2196                  * reporting if we just enabled it
2197                  */
2198
2199                 rc = -EINVAL;
2200                 goto fail0;
2201         }
2202
2203         /* Default error mask is any memory */
2204         pvt->inject.channel = 0;
2205         pvt->inject.dimm = -1;
2206         pvt->inject.rank = -1;
2207         pvt->inject.bank = -1;
2208         pvt->inject.page = -1;
2209         pvt->inject.col = -1;
2210
2211         /* allocating generic PCI control info */
2212         i7core_pci_ctl_create(pvt);
2213
2214         /* DCLK for scrub rate setting */
2215         pvt->dclk_freq = get_dclk_freq();
2216
2217         return 0;
2218
2219 fail0:
2220         kfree(mci->ctl_name);
2221         edac_mc_free(mci);
2222         i7core_dev->mci = NULL;
2223         return rc;
2224 }
2225
2226 /*
2227  *      i7core_probe    Probe for ONE instance of device to see if it is
2228  *                      present.
2229  *      return:
2230  *              0 for FOUND a device
2231  *              < 0 for error code
2232  */
2233
2234 static int __devinit i7core_probe(struct pci_dev *pdev,
2235                                   const struct pci_device_id *id)
2236 {
2237         int rc, count = 0;
2238         struct i7core_dev *i7core_dev;
2239
2240         /* get the pci devices we want to reserve for our use */
2241         mutex_lock(&i7core_edac_lock);
2242
2243         /*
2244          * All memory controllers are allocated at the first pass.
2245          */
2246         if (unlikely(probed >= 1)) {
2247                 mutex_unlock(&i7core_edac_lock);
2248                 return -ENODEV;
2249         }
2250         probed++;
2251
2252         rc = i7core_get_all_devices();
2253         if (unlikely(rc < 0))
2254                 goto fail0;
2255
2256         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2257                 count++;
2258                 rc = i7core_register_mci(i7core_dev);
2259                 if (unlikely(rc < 0))
2260                         goto fail1;
2261         }
2262
2263         /*
2264          * Nehalem-EX uses a different memory controller. However, as the
2265          * memory controller is not visible on some Nehalem/Nehalem-EP, we
2266          * need to indirectly probe via a X58 PCI device. The same devices
2267          * are found on (some) Nehalem-EX. So, on those machines, the
2268          * probe routine needs to return -ENODEV, as the actual Memory
2269          * Controller registers won't be detected.
2270          */
2271         if (!count) {
2272                 rc = -ENODEV;
2273                 goto fail1;
2274         }
2275
2276         i7core_printk(KERN_INFO,
2277                       "Driver loaded, %d memory controller(s) found.\n",
2278                       count);
2279
2280         mutex_unlock(&i7core_edac_lock);
2281         return 0;
2282
2283 fail1:
2284         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2285                 i7core_unregister_mci(i7core_dev);
2286
2287         i7core_put_all_devices();
2288 fail0:
2289         mutex_unlock(&i7core_edac_lock);
2290         return rc;
2291 }
2292
2293 /*
2294  *      i7core_remove   destructor for one instance of device
2295  *
2296  */
2297 static void __devexit i7core_remove(struct pci_dev *pdev)
2298 {
2299         struct i7core_dev *i7core_dev;
2300
2301         debugf0(__FILE__ ": %s()\n", __func__);
2302
2303         /*
2304          * we have a trouble here: pdev value for removal will be wrong, since
2305          * it will point to the X58 register used to detect that the machine
2306          * is a Nehalem or upper design. However, due to the way several PCI
2307          * devices are grouped together to provide MC functionality, we need
2308          * to use a different method for releasing the devices
2309          */
2310
2311         mutex_lock(&i7core_edac_lock);
2312
2313         if (unlikely(!probed)) {
2314                 mutex_unlock(&i7core_edac_lock);
2315                 return;
2316         }
2317
2318         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2319                 i7core_unregister_mci(i7core_dev);
2320
2321         /* Release PCI resources */
2322         i7core_put_all_devices();
2323
2324         probed--;
2325
2326         mutex_unlock(&i7core_edac_lock);
2327 }
2328
2329 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2330
2331 /*
2332  *      i7core_driver   pci_driver structure for this module
2333  *
2334  */
2335 static struct pci_driver i7core_driver = {
2336         .name     = "i7core_edac",
2337         .probe    = i7core_probe,
2338         .remove   = __devexit_p(i7core_remove),
2339         .id_table = i7core_pci_tbl,
2340 };
2341
2342 /*
2343  *      i7core_init             Module entry function
2344  *                      Try to initialize this module for its devices
2345  */
2346 static int __init i7core_init(void)
2347 {
2348         int pci_rc;
2349
2350         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2351
2352         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2353         opstate_init();
2354
2355         if (use_pci_fixup)
2356                 i7core_xeon_pci_fixup(pci_dev_table);
2357
2358         pci_rc = pci_register_driver(&i7core_driver);
2359
2360         if (pci_rc >= 0) {
2361                 mce_register_decode_chain(&i7_mce_dec);
2362                 return 0;
2363         }
2364
2365         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2366                       pci_rc);
2367
2368         return pci_rc;
2369 }
2370
2371 /*
2372  *      i7core_exit()   Module exit function
2373  *                      Unregister the driver
2374  */
2375 static void __exit i7core_exit(void)
2376 {
2377         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2378         pci_unregister_driver(&i7core_driver);
2379         mce_unregister_decode_chain(&i7_mce_dec);
2380 }
2381
2382 module_init(i7core_init);
2383 module_exit(i7core_exit);
2384
2385 MODULE_LICENSE("GPL");
2386 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2387 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2388 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2389                    I7CORE_REVISION);
2390
2391 module_param(edac_op_state, int, 0444);
2392 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");