]> Pileus Git - ~andy/linux/blobdiff - drivers/edac/edac_mc.c
Merge branch 'for-john' of git://x-git.kernel.org/pub/scm/linux/kernel/git/jberg...
[~andy/linux] / drivers / edac / edac_mc.c
index d1e9eb191f2bd77ff72614c0819885d31dcf473d..cdb81aa73ab7aeb595be53f77430488486947e66 100644 (file)
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
 
+/*
+ * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
+ *     apei/ghes and i7core_edac to be used at the same time.
+ */
+static void const *edac_mc_owner;
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
                                 unsigned len)
 {
@@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 
        mci->op_state = OP_ALLOC;
 
-       /* at this point, the root kobj is valid, and in order to
-        * 'free' the object, then the function:
-        *      edac_mc_unregister_sysfs_main_kobj() must be called
-        * which will perform kobj unregistration and the actual free
-        * will occur during the kobject callback operation
-        */
-
        return mci;
 
 error:
@@ -666,9 +665,9 @@ fail1:
        return 1;
 }
 
-static void del_mc_from_global_list(struct mem_ctl_info *mci)
+static int del_mc_from_global_list(struct mem_ctl_info *mci)
 {
-       atomic_dec(&edac_handlers);
+       int handlers = atomic_dec_return(&edac_handlers);
        list_del_rcu(&mci->link);
 
        /* these are for safe removal of devices from global list while
@@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
         */
        synchronize_rcu();
        INIT_LIST_HEAD(&mci->link);
+
+       return handlers;
 }
 
 /**
@@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find);
 /* FIXME - should a warning be printed if no error detection? correction? */
 int edac_mc_add_mc(struct mem_ctl_info *mci)
 {
+       int ret = -EINVAL;
        edac_dbg(0, "\n");
 
 #ifdef CONFIG_EDAC_DEBUG
@@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 #endif
        mutex_lock(&mem_ctls_mutex);
 
+       if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
+               ret = -EPERM;
+               goto fail0;
+       }
+
        if (add_mc_to_global_list(mci))
                goto fail0;
 
@@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 
+       edac_mc_owner = mci->mod_name;
+
        mutex_unlock(&mem_ctls_mutex);
        return 0;
 
@@ -783,7 +792,7 @@ fail1:
 
 fail0:
        mutex_unlock(&mem_ctls_mutex);
-       return 1;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 
@@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
                return NULL;
        }
 
-       del_mc_from_global_list(mci);
+       if (!del_mc_from_global_list(mci))
+               edac_mc_owner = NULL;
        mutex_unlock(&mem_ctls_mutex);
 
        /* flush workq processes */
@@ -907,6 +917,7 @@ const char *edac_layer_name[] = {
        [EDAC_MC_LAYER_CHANNEL] = "channel",
        [EDAC_MC_LAYER_SLOT] = "slot",
        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+       [EDAC_MC_LAYER_ALL_MEM] = "memory",
 };
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
@@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci,
        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
-#define OTHER_LABEL " or "
+/**
+ * edac_raw_mc_handle_error - reports a memory event to userspace without doing
+ *                           anything to discover the error location
+ *
+ * @type:              severity of the error (CE/UE/Fatal)
+ * @mci:               a struct mem_ctl_info pointer
+ * @e:                 error description
+ *
+ * This raw function is used internally by edac_mc_handle_error(). It should
+ * only be called directly when the hardware error come directly from BIOS,
+ * like in the case of APEI GHES driver.
+ */
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+                             struct mem_ctl_info *mci,
+                             struct edac_raw_error_desc *e)
+{
+       char detail[80];
+       int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+
+       /* Memory type dependent details about the error */
+       if (type == HW_EVENT_ERR_CORRECTED) {
+               snprintf(detail, sizeof(detail),
+                       "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
+                       e->page_frame_number, e->offset_in_page,
+                       e->grain, e->syndrome);
+               edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+                             detail, e->other_detail, e->enable_per_layer_report,
+                             e->page_frame_number, e->offset_in_page, e->grain);
+       } else {
+               snprintf(detail, sizeof(detail),
+                       "page:0x%lx offset:0x%lx grain:%ld",
+                       e->page_frame_number, e->offset_in_page, e->grain);
+
+               edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+                             detail, e->other_detail, e->enable_per_layer_report);
+       }
+
+
+}
+EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
 
 /**
  * edac_mc_handle_error - reports a memory event to userspace
@@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                          const char *msg,
                          const char *other_detail)
 {
-       /* FIXME: too much for stack: move it to some pre-alocated area */
-       char detail[80], location[80];
-       char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
        char *p;
        int row = -1, chan = -1;
        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
-       int i;
-       long grain;
-       bool enable_per_layer_report = false;
+       int i, n_labels = 0;
        u8 grain_bits;
+       struct edac_raw_error_desc *e = &mci->error_desc;
 
        edac_dbg(3, "MC%d\n", mci->mc_idx);
 
+       /* Fills the error report buffer */
+       memset(e, 0, sizeof (*e));
+       e->error_count = error_count;
+       e->top_layer = top_layer;
+       e->mid_layer = mid_layer;
+       e->low_layer = low_layer;
+       e->page_frame_number = page_frame_number;
+       e->offset_in_page = offset_in_page;
+       e->syndrome = syndrome;
+       e->msg = msg;
+       e->other_detail = other_detail;
+
        /*
         * Check if the event report is consistent and if the memory
         * location is known. If it is known, enable_per_layer_report will be
@@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                        pos[i] = -1;
                }
                if (pos[i] >= 0)
-                       enable_per_layer_report = true;
+                       e->enable_per_layer_report = true;
        }
 
        /*
@@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
         * where each memory belongs to a separate channel within the same
         * branch.
         */
-       grain = 0;
-       p = label;
+       p = e->label;
        *p = '\0';
 
        for (i = 0; i < mci->tot_dimms; i++) {
@@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                        continue;
 
                /* get the max grain, over the error match range */
-               if (dimm->grain > grain)
-                       grain = dimm->grain;
+               if (dimm->grain > e->grain)
+                       e->grain = dimm->grain;
 
                /*
                 * If the error is memory-controller wide, there's no need to
@@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                 * channel/memory controller/...  may be affected.
                 * Also, don't show errors for empty DIMM slots.
                 */
-               if (enable_per_layer_report && dimm->nr_pages) {
-                       if (p != label) {
+               if (e->enable_per_layer_report && dimm->nr_pages) {
+                       if (n_labels >= EDAC_MAX_LABELS) {
+                               e->enable_per_layer_report = false;
+                               break;
+                       }
+                       n_labels++;
+                       if (p != e->label) {
                                strcpy(p, OTHER_LABEL);
                                p += strlen(OTHER_LABEL);
                        }
@@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                }
        }
 
-       if (!enable_per_layer_report) {
-               strcpy(label, "any memory");
+       if (!e->enable_per_layer_report) {
+               strcpy(e->label, "any memory");
        } else {
                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
-               if (p == label)
-                       strcpy(label, "unknown memory");
+               if (p == e->label)
+                       strcpy(e->label, "unknown memory");
                if (type == HW_EVENT_ERR_CORRECTED) {
                        if (row >= 0) {
                                mci->csrows[row]->ce_count += error_count;
@@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
        }
 
        /* Fill the RAM location data */
-       p = location;
+       p = e->location;
 
        for (i = 0; i < mci->n_layers; i++) {
                if (pos[i] < 0)
@@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                             edac_layer_name[mci->layers[i].type],
                             pos[i]);
        }
-       if (p > location)
+       if (p > e->location)
                *(p - 1) = '\0';
 
        /* Report the error via the trace interface */
-       grain_bits = fls_long(grain) + 1;
-       trace_mc_event(type, msg, label, error_count,
-                      mci->mc_idx, top_layer, mid_layer, low_layer,
-                      PAGES_TO_MiB(page_frame_number) | offset_in_page,
-                      grain_bits, syndrome, other_detail);
+       grain_bits = fls_long(e->grain) + 1;
+       trace_mc_event(type, e->msg, e->label, e->error_count,
+                      mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+                      PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+                      grain_bits, e->syndrome, e->other_detail);
 
-       /* Memory type dependent details about the error */
-       if (type == HW_EVENT_ERR_CORRECTED) {
-               snprintf(detail, sizeof(detail),
-                       "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-                       page_frame_number, offset_in_page,
-                       grain, syndrome);
-               edac_ce_error(mci, error_count, pos, msg, location, label,
-                             detail, other_detail, enable_per_layer_report,
-                             page_frame_number, offset_in_page, grain);
-       } else {
-               snprintf(detail, sizeof(detail),
-                       "page:0x%lx offset:0x%lx grain:%ld",
-                       page_frame_number, offset_in_page, grain);
-
-               edac_ue_error(mci, error_count, pos, msg, location, label,
-                             detail, other_detail, enable_per_layer_report);
-       }
+       edac_raw_mc_handle_error(type, mci, e);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);