All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] edac: i5100 fault injection
@ 2011-12-14 16:04 Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 1/4] edac: i5100 add sysfs nodes for " Niklas Söderlund
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Niklas Söderlund @ 2011-12-14 16:04 UTC (permalink / raw)
  To: lucas.demarchi, borislav.petkov, tony.luck; +Cc: linux-kernel, linux-edac

Add experimental support for fault injection to the i5100 MC.
Not all information is in the i5100 datasheet some parts where
found using experimentation and the i7300 datasheet which
corresponds a bit to the i5100 in some areas.

Based on previous patch send to this list to improve error detection 
(ref 1) not included in this patchset. I can't find it in any edac
repo, not sure if I should have included it in this patchset or not,
please enlighten me so I know in the future.

[1] [PATCH] edac: i5100 ack error detection register after each read


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] edac: i5100 add sysfs nodes for fault injection
  2011-12-14 16:04 [PATCH 0/4] edac: i5100 fault injection Niklas Söderlund
@ 2011-12-14 16:04 ` Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 2/4] edac: i5100 probe for device 19 function 0 Niklas Söderlund
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Niklas Söderlund @ 2011-12-14 16:04 UTC (permalink / raw)
  To: lucas.demarchi, borislav.petkov, tony.luck
  Cc: linux-kernel, linux-edac, Niklas Söderlund

Add sysfs nodes to /sys/devices/system/edac/mc/mcX to control fault
injection.

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
---
 drivers/edac/i5100_edac.c |   94 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index bcbdeec..1319e59 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -338,6 +338,15 @@ struct i5100_priv {
 
 	struct delayed_work i5100_scrubbing;
 	int scrub_enable;
+
+	/* Error injection */
+	u32 inject_enable;
+	u32 inject_channel;
+	u32 inject_hlinesel;
+	u32 inject_deviceptr1;
+	u32 inject_deviceptr2;
+	u32 inject_eccmask1;
+	u32 inject_eccmask2;
 };
 
 /* map a rank/chan to a slot number on the mainboard */
@@ -879,6 +888,80 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 	}
 }
 
+#define DECLARE_INJECT_PARAM(param, limit_low, limit_high)		\
+static ssize_t i5100_inject_store_##param(				\
+		struct mem_ctl_info *mci,				\
+		const char *data,					\
+		size_t count)						\
+{									\
+	struct i5100_priv *priv = mci->pvt_info;			\
+	unsigned long value;						\
+	int rc;								\
+									\
+	rc = kstrtoul(data, 10, &value);				\
+	if (rc < 0 || value < limit_low || value > limit_high)		\
+		return -EIO;						\
+									\
+	priv->param = value;						\
+									\
+	return count;							\
+}									\
+									\
+static ssize_t i5100_inject_show_##param(				\
+		struct mem_ctl_info *mci,				\
+		char *data)						\
+{									\
+	struct i5100_priv *priv = mci->pvt_info;			\
+	return sprintf(data, "%d\n", priv->param);			\
+}
+
+#define ATTR_INJECT(param)						\
+	{								\
+		.attr = {						\
+			.name = #param,					\
+			.mode = (S_IRUGO | S_IWUSR)			\
+		},							\
+		.show  = i5100_inject_show_##param,			\
+		.store = i5100_inject_store_##param,			\
+	}
+
+DECLARE_INJECT_PARAM(inject_channel, 0, 1);
+DECLARE_INJECT_PARAM(inject_hlinesel, 1, 3);
+DECLARE_INJECT_PARAM(inject_deviceptr1, 0, 17);
+DECLARE_INJECT_PARAM(inject_deviceptr2, 0, 17);
+DECLARE_INJECT_PARAM(inject_eccmask1, 0, 0xFFFF);
+DECLARE_INJECT_PARAM(inject_eccmask2, 0, 0xFFFF);
+
+static ssize_t i5100_inject_store_inject_enable(struct mem_ctl_info *mci,
+		const char *data, size_t count)
+{
+	unsigned long value;
+
+	if (kstrtoul(data, 10, &value) < 0)
+		return -EIO;
+
+	return count;
+}
+
+static ssize_t i5100_inject_show_inject_enable(struct mem_ctl_info *mci,
+		char *data)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	return sprintf(data, "%d\n", priv->inject_enable);
+}
+
+static const struct mcidev_sysfs_attribute i5100_sysfs_attrs[] = {
+	ATTR_INJECT(inject_enable),
+	ATTR_INJECT(inject_channel),
+	ATTR_INJECT(inject_hlinesel),
+	ATTR_INJECT(inject_deviceptr1),
+	ATTR_INJECT(inject_deviceptr2),
+	ATTR_INJECT(inject_eccmask1),
+	ATTR_INJECT(inject_eccmask2),
+
+	{ } /* End of list */
+};
+
 static int __devinit i5100_init_one(struct pci_dev *pdev,
 				    const struct pci_device_id *id)
 {
@@ -984,6 +1067,17 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 	mci->set_sdram_scrub_rate = i5100_set_scrub_rate;
 	mci->get_sdram_scrub_rate = i5100_get_scrub_rate;
 
+	priv->inject_enable = 0;
+	priv->inject_channel = 0;
+	priv->inject_hlinesel = 1;
+	priv->inject_deviceptr1 = 0;
+	priv->inject_deviceptr2 = 0;
+	priv->inject_eccmask1 = 0;
+	priv->inject_eccmask2 = 0;
+
+	/* Configure sysfs */
+	mci->mc_driver_sysfs_attributes = i5100_sysfs_attrs;
+
 	i5100_init_csrows(mci);
 
 	/* this strange construction seems to be in every driver, dunno why */
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] edac: i5100 probe for device 19 function 0
  2011-12-14 16:04 [PATCH 0/4] edac: i5100 fault injection Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 1/4] edac: i5100 add sysfs nodes for " Niklas Söderlund
@ 2011-12-14 16:04 ` Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 3/4] edac: i5100 add fault injection code Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 4/4] edac: i5100 add documentation for fault injection Niklas Söderlund
  3 siblings, 0 replies; 5+ messages in thread
From: Niklas Söderlund @ 2011-12-14 16:04 UTC (permalink / raw)
  To: lucas.demarchi, borislav.petkov, tony.luck
  Cc: linux-kernel, linux-edac, Niklas Söderlund

Probe and store the device handle for the device 19 function 0 during
driver initialization. The device is used during fault injection.

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
---
 drivers/edac/i5100_edac.c |   26 +++++++++++++++++++++++++-
 include/linux/pci_ids.h   |    1 +
 2 files changed, 26 insertions(+), 1 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 1319e59..3840674 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -333,6 +333,7 @@ struct i5100_priv {
 	unsigned ranksperchan;	/* number of ranks per channel */
 
 	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *mc_einj;/* device 19 func 0 */
 	struct pci_dev *ch0mm;	/* device 21 func 0 */
 	struct pci_dev *ch1mm;	/* device 22 func 0 */
 
@@ -968,7 +969,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 	int rc;
 	struct mem_ctl_info *mci;
 	struct i5100_priv *priv;
-	struct pci_dev *ch0mm, *ch1mm;
+	struct pci_dev *ch0mm, *ch1mm, *einj;
 	int ret = 0;
 	u32 dw;
 	int ranksperch;
@@ -1033,6 +1034,22 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 		goto bail_disable_ch1;
 	}
 
+
+	/* device 19, func 0, Error injection */
+	einj = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_19, 0);
+	if (!einj) {
+		ret = -ENODEV;
+		goto bail_einj;
+	}
+
+	rc = pci_enable_device(einj);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_disable_einj;
+	}
+
+
 	mci->dev = &pdev->dev;
 
 	priv = mci->pvt_info;
@@ -1040,6 +1057,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 	priv->mc = pdev;
 	priv->ch0mm = ch0mm;
 	priv->ch1mm = ch1mm;
+	priv->mc_einj = einj;
 
 	INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing);
 
@@ -1102,6 +1120,12 @@ bail_scrub:
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 	edac_mc_free(mci);
 
+bail_disable_einj:
+	pci_disable_device(einj);
+
+bail_einj:
+	pci_dev_put(einj);
+
 bail_disable_ch1:
 	pci_disable_device(ch1mm);
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2aaee0c..8bf149d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2732,6 +2732,7 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB9	0x3c2f
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
 #define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] edac: i5100 add fault injection code
  2011-12-14 16:04 [PATCH 0/4] edac: i5100 fault injection Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 1/4] edac: i5100 add sysfs nodes for " Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 2/4] edac: i5100 probe for device 19 function 0 Niklas Söderlund
@ 2011-12-14 16:04 ` Niklas Söderlund
  2011-12-14 16:04 ` [PATCH 4/4] edac: i5100 add documentation for fault injection Niklas Söderlund
  3 siblings, 0 replies; 5+ messages in thread
From: Niklas Söderlund @ 2011-12-14 16:04 UTC (permalink / raw)
  To: lucas.demarchi, borislav.petkov, tony.luck
  Cc: linux-kernel, linux-edac, Niklas Söderlund

Add fault injection based on information datasheet for i5100, see 1. In
addition to the i5100 datasheet some missing information on injection
functions where found through experimentation and the i7300 datasheet,
see 2.

[1] Intel 5100 Memory Controller Hub Chipset
    Doc.Nr: 318378
    http://www.intel.com/content/dam/doc/datasheet/5100-
    memory-controller-hub-chipset-datasheet.pdf

[2] Intel 7300 Chipset MemoryController Hub (MCH)
    Doc.Nr: 318082
    http://www.intel.com/assets/pdf/datasheet/318082.pdf

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
---
 drivers/edac/i5100_edac.c |  110 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 110 insertions(+), 0 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 3840674..54392e2 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -63,6 +63,14 @@
 			I5100_FERR_NF_MEM_M1ERR_MASK)
 #define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
 #define I5100_EMASK_MEM		0xa8	/* MC Error Mask Register */
+#define I5100_MEM0EINJMSK0	0x200	/* Injection Mask0 Register Channel 0 */
+#define I5100_MEM1EINJMSK0	0x208	/* Injection Mask0 Register Channel 1 */
+#define		I5100_MEMXEINJMSK0_EINJEN	(1 << 27)
+#define I5100_MEM0EINJMSK1	0x204	/* Injection Mask1 Register Channel 0 */
+#define I5100_MEM1EINJMSK1	0x206	/* Injection Mask1 Register Channel 1 */
+
+/* Device 19, Function 0 */
+#define I5100_DINJ0 0x9a
 
 /* device 21 and 22, func 0 */
 #define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
@@ -889,6 +897,100 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 	}
 }
 
+/****************************************************************************
+ *                       Error injection routines
+ ****************************************************************************
+ *
+ * The i5100 has independent error injection features per channel.
+ * However, to have a simpler code, we don't allow enabling error injection
+ * on more than one channel.
+ * Also, since a change at an inject parameter will be applied only at enable,
+ * we're disabling error injection on all write calls to the sysfs nodes that
+ * controls
+ */
+
+static int i5100_inject_write(struct mem_ctl_info *mci, u32 mask0, u16 mask1,
+		u8 dinj)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+
+	/* MEM[1:0]EINJMSK0
+	 * 31    - ADDRMATCHEN
+	 * 29:28 - HLINESEL
+	 *         00 Reserved
+	 *         01 Lower half of cache line
+	 *         10 Upper half of cache line
+	 *         11 Both upper and lower parts of cache line
+	 * 27    - EINJEN
+	 * 25:19 - XORMASK1 for deviceptr1
+	 * 9:5   - SEC2RAM or deviceptr2
+	 * 4:0   - FIR2RAM or deviceptr1
+	 */
+
+	/* MEM[1:0]EINJMSK1
+	 * 15:0  - XORMASK2 for deviceptr2
+	 */
+
+	/* Only write to the specified channel. */
+	if (priv->inject_channel == 0) {
+		pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0);
+		pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1);
+	} else {
+		pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0);
+		pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1);
+	}
+
+	/* Error Injection Response Function
+	 * Intel 5100 Memory Controller Hub Chipset (318378) datasheet
+	 * hints about this register but carry no data about them. All
+	 * data regarding device 19 is based on experimentation and the
+	 * Intel 7300 Chipset Memory Controller Hub (318082) datasheet
+	 * which appears to be accurate for the i5100 in this area.
+	 *
+	 * The injection code don't work without settig this register.
+	 *
+	 * Stop condition bits 7:4
+	 * 1010 - Stop after one injection
+	 * 1011 - Never stop injecting faults
+	 *
+	 * Start condition bits 3:0
+	 * 1010 - Never start
+	 * 1011 - Start immediately
+	 */
+	pci_write_config_byte(priv->mc_einj, I5100_DINJ0, dinj);
+
+	return 0;
+}
+
+
+static int i5100_inject_disable(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+
+	priv->inject_enable = 0;
+
+	return i5100_inject_write(mci, 0, 0, 0xaa);
+}
+
+static int i5100_inject_enable(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u32 mask0;
+	u16 mask1;
+
+	priv->inject_enable = 1;
+
+	mask0 = ((priv->inject_hlinesel & 0x3) << 28) |
+		I5100_MEMXEINJMSK0_EINJEN |
+		((priv->inject_eccmask1 & 0xffff) << 10) |
+		((priv->inject_deviceptr2 & 0x1f) << 5) |
+		(priv->inject_deviceptr1 & 0x1f);
+
+	mask1 = priv->inject_eccmask2;
+
+	return i5100_inject_write(mci, mask0, mask1, 0xab);
+}
+
 #define DECLARE_INJECT_PARAM(param, limit_low, limit_high)		\
 static ssize_t i5100_inject_store_##param(				\
 		struct mem_ctl_info *mci,				\
@@ -899,6 +1001,9 @@ static ssize_t i5100_inject_store_##param(				\
 	unsigned long value;						\
 	int rc;								\
 									\
+	if (priv->inject_enable)					\
+		i5100_inject_disable(mci);				\
+									\
 	rc = kstrtoul(data, 10, &value);				\
 	if (rc < 0 || value < limit_low || value > limit_high)		\
 		return -EIO;						\
@@ -941,6 +1046,11 @@ static ssize_t i5100_inject_store_inject_enable(struct mem_ctl_info *mci,
 	if (kstrtoul(data, 10, &value) < 0)
 		return -EIO;
 
+	if (value)
+		i5100_inject_enable(mci);
+	else
+		i5100_inject_disable(mci);
+
 	return count;
 }
 
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] edac: i5100 add documentation for fault injection
  2011-12-14 16:04 [PATCH 0/4] edac: i5100 fault injection Niklas Söderlund
                   ` (2 preceding siblings ...)
  2011-12-14 16:04 ` [PATCH 3/4] edac: i5100 add fault injection code Niklas Söderlund
@ 2011-12-14 16:04 ` Niklas Söderlund
  3 siblings, 0 replies; 5+ messages in thread
From: Niklas Söderlund @ 2011-12-14 16:04 UTC (permalink / raw)
  To: lucas.demarchi, borislav.petkov, tony.luck
  Cc: linux-kernel, linux-edac, Niklas Söderlund

Add documentation for the fault injection sysfs nodes supporting the
i5100 MC.

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
---
 Documentation/edac.txt |   58 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 249822c..6476d1b 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -869,3 +869,61 @@ exports one
    by the driver. Since, with udimm, this is counted by software, it is
    possible that some errors could be lost. With rdimm's, they displays the
    contents of the registers
+
+=======================================================================
+I5100 ERROR INJECTION USING EDAC APIs
+
+The i5100 MC has the ability to generate errors. The driver implements this
+functionality via some error injection nodes in sysfs under:
+
+ /sys/devices/system/edac/mc/mc?/
+
+   inject_channel:
+       Specifies what channel that will generate an error
+
+   inject_deviceptr1:
+   inject_deviceptr2:
+       Specifies what device location errors will be injected in. There
+       are two device pointers per channel and 18 locations on each device.
+
+   inject_hlinesel:
+       Specifies what part of the cacheline the eccmask is applied to
+        3 for both
+        2 for the highest
+        1 for the lowest
+
+   inject_eccmask1:
+   inject_eccmask2:
+       Specify what bits will have troubles,
+        bits 0:7  - XOR mask for transfer 0 (lower half cache line) or 2
+                   (upper half cache line).
+        bits 8:15 - XOR mask for transfer 1 (lower half cache line) or 3
+                   (upper half cache line).
+
+   inject_enable:
+       Injects one error when when something different than 0 is written.
+
+   All inject vars can be read. root permission is needed for write.
+
+   Datasheet states nothing on how to trigger an injection, experimentation
+   shows that a read is enough to detect the error. If the error is not
+   detected reinject it a few times by first disabling then enabling using
+   inject_enable. During testing the driver detected about 75% of the injected
+   errors, not sure why the driver don't see all injections.
+
+   For example, the following code will generate an CE error on channel 0:
+
+   echo 0 > /sys/devices/system/edac/mc/mc0/inject_channel
+   echo 0 > /sys/devices/system/edac/mc/mc0/inject_deviceptr1
+   echo 1 > /sys/devices/system/edac/mc/mc0/inject_hlinesel
+   echo 61440 > /sys/devices/system/edac/mc/mc0/inject_eccmask1
+   echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
+
+   The generated error message will look like:
+
+   CE chan 0, bank 0, rank 0, syndrome 0x29c5860f, cas 2920, ras 12235, csrow 0, label "DIMM0": correctable demand data ECC
+
+   If the injected error is not detected reinject it:
+
+   echo 0 >/sys/devices/system/edac/mc/mc0/inject_enable
+   echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-12-14 16:05 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-14 16:04 [PATCH 0/4] edac: i5100 fault injection Niklas Söderlund
2011-12-14 16:04 ` [PATCH 1/4] edac: i5100 add sysfs nodes for " Niklas Söderlund
2011-12-14 16:04 ` [PATCH 2/4] edac: i5100 probe for device 19 function 0 Niklas Söderlund
2011-12-14 16:04 ` [PATCH 3/4] edac: i5100 add fault injection code Niklas Söderlund
2011-12-14 16:04 ` [PATCH 4/4] edac: i5100 add documentation for fault injection Niklas Söderlund

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.