linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support
@ 2012-11-19 10:02 Daniel J Blueman
  2012-11-19 10:02 ` [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers Daniel J Blueman
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Daniel J Blueman @ 2012-11-19 10:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel, Daniel J Blueman

Fix the handling of memory controller detection to index the array
of detected Northbridges, allowing memory controllers over multiple
PCI domains in federated systems eg using Numascale's NumaConnect/
NumaChip.

v4: Generate linear Northbridge ID by indexing detected Northbridges
v5: Reorder functions to prevent extra function declaration; merge 4th
    patch; simplify Fam15h code; add detail to warning
v6: Remove unused variable after simplification

Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
 arch/x86/include/asm/amd_nb.h |   13 +++++++++++
 drivers/edac/amd64_edac.c     |   48 +++++++++++++++++++++--------------------
 drivers/edac/amd64_edac.h     |    6 ------
 3 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..9f5532a 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
 	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
 }
 
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i != amd_nb_num(); i++)
+		if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == pci_domain_nr(pdev->bus) &&
+		    PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == PCI_SLOT(pdev->devfn))
+			return i;
+
+	WARN(1, "Unable to find AMD Northbridge identifier for %s\n", pci_name(pdev));
+	return 0;
+}
+
 #else
 
 #define amd_nb_num(x)		0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c7..8de8873 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -982,6 +982,24 @@ static u64 get_error_address(struct mce *m)
 	return addr;
 }
 
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+						unsigned int device,
+						struct pci_dev *related)
+{
+	struct pci_dev *dev = NULL;
+
+	dev = pci_get_device(vendor, device, dev);
+	while (dev) {
+		if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+		    (dev->bus->number == related->bus->number) &&
+		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+			break;
+		dev = pci_get_device(vendor, device, dev);
+	}
+
+	return dev;
+}
+
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1001,11 +1019,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 
 	/* Factor in CC6 save area by reading dst node's limit reg */
 	if (c->x86 == 0x15) {
-		struct pci_dev *f1 = NULL;
-		u8 nid = dram_dst_node(pvt, range);
+		struct pci_dev *misc, *f1 = NULL;
+		u16 nid = dram_dst_node(pvt, range);
 		u32 llim;
 
-		f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1));
+		misc = node_to_amd_nb(nid)->misc;
+		f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
 		if (WARN_ON(!f1))
 			return;
 
@@ -1712,23 +1731,6 @@ static struct amd64_family_type amd64_family_types[] = {
 	},
 };
 
-static struct pci_dev *pci_get_related_function(unsigned int vendor,
-						unsigned int device,
-						struct pci_dev *related)
-{
-	struct pci_dev *dev = NULL;
-
-	dev = pci_get_device(vendor, device, dev);
-	while (dev) {
-		if ((dev->bus->number == related->bus->number) &&
-		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
-			break;
-		dev = pci_get_device(vendor, device, dev);
-	}
-
-	return dev;
-}
-
 /*
  * These are tables of eigenvectors (one per line) which can be used for the
  * construction of the syndrome tables. The modified syndrome search algorithm
@@ -2546,7 +2548,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
 	struct mem_ctl_info *mci = NULL;
 	struct edac_mc_layer layers[2];
 	int err = 0, ret;
-	u8 nid = get_node_id(F2);
+	u8 nid = amd_get_node_id(F2);
 
 	ret = -ENOMEM;
 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2637,7 +2639,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 					     const struct pci_device_id *mc_type)
 {
-	u8 nid = get_node_id(pdev);
+	u8 nid = amd_get_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s;
 	int ret = 0;
@@ -2687,7 +2689,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
 {
 	struct mem_ctl_info *mci;
 	struct amd64_pvt *pvt;
-	u8 nid = get_node_id(pdev);
+	u8 nid = amd_get_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s = ecc_stngs[nid];
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8d48047..90cae61 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -290,12 +290,6 @@
 /* MSRs */
 #define MSR_MCGCTL_NBE			BIT(4)
 
-/* AMD sets the first MC device at device ID 0x18. */
-static inline u8 get_node_id(struct pci_dev *pdev)
-{
-	return PCI_SLOT(pdev->devfn) - 0x18;
-}
-
 enum amd_families {
 	K8_CPUS = 0,
 	F10_CPUS,
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers
  2012-11-19 10:02 [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Daniel J Blueman
@ 2012-11-19 10:02 ` Daniel J Blueman
  2012-11-20 15:01   ` Borislav Petkov
  2012-11-19 10:02 ` [PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent Daniel J Blueman
  2012-11-20 15:01 ` [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Borislav Petkov
  2 siblings, 1 reply; 7+ messages in thread
From: Daniel J Blueman @ 2012-11-19 10:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel, Daniel J Blueman

As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

v2: Avoid change to intlv_en variable
v3: Drop unneeded change to index

Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
 drivers/edac/amd64_edac.c |   17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8de8873..6e3f002 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
 		struct amd64_pvt *pvt;
 		u64 cc6_base, tmp_addr;
 		u32 tmp;
-		u8 mce_nid, intlv_en;
+		u16 mce_nid;
+		u8 intlv_en;
 
 		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
 			return addr;
@@ -2298,7 +2299,7 @@ out:
 	return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
 	cpumask_var_t cmask;
 	int cpu;
@@ -2336,7 +2337,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
 	return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
 				       struct pci_dev *F3)
 {
 	bool ret = true;
@@ -2388,7 +2389,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
 	return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
 					struct pci_dev *F3)
 {
 	u32 value, mask = 0x3;		/* UECC/CECC enable */
@@ -2427,7 +2428,7 @@ static const char *ecc_msg =
 	"'ecc_enable_override'.\n"
 	" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
 	u32 value;
 	u8 ecc_en = 0;
@@ -2548,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
 	struct mem_ctl_info *mci = NULL;
 	struct edac_mc_layer layers[2];
 	int err = 0, ret;
-	u8 nid = amd_get_node_id(F2);
+	u16 nid = amd_get_node_id(F2);
 
 	ret = -ENOMEM;
 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2639,7 +2640,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 					     const struct pci_device_id *mc_type)
 {
-	u8 nid = amd_get_node_id(pdev);
+	u16 nid = amd_get_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s;
 	int ret = 0;
@@ -2689,7 +2690,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
 {
 	struct mem_ctl_info *mci;
 	struct amd64_pvt *pvt;
-	u8 nid = amd_get_node_id(pdev);
+	u16 nid = amd_get_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent
  2012-11-19 10:02 [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Daniel J Blueman
  2012-11-19 10:02 ` [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers Daniel J Blueman
@ 2012-11-19 10:02 ` Daniel J Blueman
  2012-11-20 15:01   ` Borislav Petkov
  2012-11-20 15:01 ` [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Borislav Petkov
  2 siblings, 1 reply; 7+ messages in thread
From: Daniel J Blueman @ 2012-11-19 10:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel, Daniel J Blueman

As the Northbridge IDs are at most 16-bits, use the same type
consistently and cleanup some indexes to use smaller types.

v2: Drop changes for later cleanups
v3: Further changes suggested by Boris

Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
 arch/x86/include/asm/amd_nb.h    |    2 +-
 arch/x86/include/asm/processor.h |    2 +-
 arch/x86/kernel/cpu/amd.c        |    4 ++--
 drivers/edac/amd64_edac.c        |   16 ++++++++--------
 drivers/edac/amd64_edac.h        |    6 +++---
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 9f5532a..b0815a0 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
 	return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)
 {
 	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
 }
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc85..eb3ba58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
 
 struct aperfmperf {
 	u64 aperf, mperf;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f7e98a2..52cab1f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-int amd_get_nb_id(int cpu)
+u16 amd_get_nb_id(int cpu)
 {
-	int id = 0;
+	u16 id = 0;
 #ifdef CONFIG_SMP
 	id = per_cpu(cpu_llc_id, cpu);
 #endif
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6e3f002..b27412a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
  * DRAM base/limit associated with node_id
  */
 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
-				   unsigned nid)
+				   u8 nid)
 {
 	u64 addr;
 
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
 						u64 sys_addr)
 {
 	struct amd64_pvt *pvt;
-	unsigned node_id;
+	u8 node_id;
 	u32 intlv_en, bits;
 
 	/*
@@ -1021,7 +1021,7 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 	/* Factor in CC6 save area by reading dst node's limit reg */
 	if (c->x86 == 0x15) {
 		struct pci_dev *misc, *f1 = NULL;
-		u16 nid = dram_dst_node(pvt, range);
+		u8 nid = dram_dst_node(pvt, range);
 		u32 llim;
 
 		misc = node_to_amd_nb(nid)->misc;
@@ -1348,7 +1348,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
 }
 
 /* Convert the sys_addr to the normalized DCT address */
-static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 				 u64 sys_addr, bool hi_rng,
 				 u32 dct_sel_base_addr)
 {
@@ -1399,7 +1399,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
  * checks if the csrow passed in is marked as SPARED, if so returns the new
  * spare row
  */
-static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
+static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int csrow)
 {
 	int tmp_cs;
 
@@ -1424,7 +1424,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
  *	-EINVAL:  NOT FOUND
  *	0..csrow = Chip-Select Row
  */
-static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
 {
 	struct mem_ctl_info *mci;
 	struct amd64_pvt *pvt;
@@ -2256,7 +2256,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 }
 
 /* get all cores on this DCT */
-static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
+static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
 {
 	int cpu;
 
@@ -2266,7 +2266,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
 }
 
 /* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
+static bool amd64_nb_mce_bank_enabled_on_node(u16 nid)
 {
 	cpumask_var_t mask;
 	int cpu, nbe;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 90cae61..37cf7ad 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -332,7 +332,7 @@ struct amd64_pvt {
 	/* pci_device handles which we utilize */
 	struct pci_dev *F1, *F2, *F3;
 
-	unsigned mc_node_id;	/* MC index of this MC node */
+	u16 mc_node_id;		/* MC index of this MC node */
 	int ext_model;		/* extended model value of this node */
 	int channel_count;
 
@@ -368,7 +368,7 @@ struct amd64_pvt {
 	struct error_injection injection;
 };
 
-static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
+static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
 {
 	u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
 
@@ -378,7 +378,7 @@ static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
 	return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr;
 }
 
-static inline u64 get_dram_limit(struct amd64_pvt *pvt, unsigned i)
+static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i)
 {
 	u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff;
 
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent
  2012-11-19 10:02 ` [PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent Daniel J Blueman
@ 2012-11-20 15:01   ` Borislav Petkov
  0 siblings, 0 replies; 7+ messages in thread
From: Borislav Petkov @ 2012-11-20 15:01 UTC (permalink / raw)
  To: Daniel J Blueman
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel

On Mon, Nov 19, 2012 at 06:02:48PM +0800, Daniel J Blueman wrote:
> As the Northbridge IDs are at most 16-bits, use the same type
> consistently and cleanup some indexes to use smaller types.
> 
> v2: Drop changes for later cleanups
> v3: Further changes suggested by Boris
> 
> Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
> ---
>  arch/x86/include/asm/amd_nb.h    |    2 +-
>  arch/x86/include/asm/processor.h |    2 +-
>  arch/x86/kernel/cpu/amd.c        |    4 ++--
>  drivers/edac/amd64_edac.c        |   16 ++++++++--------
>  drivers/edac/amd64_edac.h        |    6 +++---
>  5 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
> index 9f5532a..b0815a0 100644
> --- a/arch/x86/include/asm/amd_nb.h
> +++ b/arch/x86/include/asm/amd_nb.h
> @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
>  	return ((amd_northbridges.flags & feature) == feature);
>  }
>  
> -static inline struct amd_northbridge *node_to_amd_nb(int node)
> +static inline struct amd_northbridge *node_to_amd_nb(u16 node)
>  {
>  	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
>  }

You have a hunk in patch 1/3 changing the argument being passed to this
function to u16 but you're changing the actual function right here, in
3/3, which is strange and easy to puzzle a potential reviewer.

As a rule of thumb: always do your changes to the functions and their callsites
in one patch so that they belong together and can be reviewed easily.

Thanks.

-- 
Regards/Gruss,
Boris.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support
  2012-11-19 10:02 [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Daniel J Blueman
  2012-11-19 10:02 ` [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers Daniel J Blueman
  2012-11-19 10:02 ` [PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent Daniel J Blueman
@ 2012-11-20 15:01 ` Borislav Petkov
  2 siblings, 0 replies; 7+ messages in thread
From: Borislav Petkov @ 2012-11-20 15:01 UTC (permalink / raw)
  To: Daniel J Blueman
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel

On Mon, Nov 19, 2012 at 06:02:46PM +0800, Daniel J Blueman wrote:
> Fix the handling of memory controller detection to index the array
> of detected Northbridges, allowing memory controllers over multiple
> PCI domains in federated systems eg using Numascale's NumaConnect/
> NumaChip.
> 
> v4: Generate linear Northbridge ID by indexing detected Northbridges
> v5: Reorder functions to prevent extra function declaration; merge 4th
>     patch; simplify Fam15h code; add detail to warning
> v6: Remove unused variable after simplification
> 
> Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>

Ok, let's do this right:

> ---
>  arch/x86/include/asm/amd_nb.h |   13 +++++++++++
>  drivers/edac/amd64_edac.c     |   48 +++++++++++++++++++++--------------------
>  drivers/edac/amd64_edac.h     |    6 ------
>  3 files changed, 38 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
> index b3341e9..9f5532a 100644
> --- a/arch/x86/include/asm/amd_nb.h
> +++ b/arch/x86/include/asm/amd_nb.h
> @@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
>  	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
>  }
>  
> +static inline u16 amd_get_node_id(struct pci_dev *pdev)
> +{
> +	int i;
> +
> +	for (i = 0; i != amd_nb_num(); i++)
> +		if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == pci_domain_nr(pdev->bus) &&
> +		    PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == PCI_SLOT(pdev->devfn))
> +			return i;
> +
> +	WARN(1, "Unable to find AMD Northbridge identifier for %s\n", pci_name(pdev));
> +	return 0;
> +}
> +

This patch should be split into the changes adding amd_get_node_id and related
to it and the rest of them.

Btw, those lines above stick needlessly beyond 80 cols so you could do
something like:

static inline u16 amd_get_node_id(struct pci_dev *pdev)
{
	struct pci_dev *misc;
	int i;

	for (i = 0; i != amd_nb_num(); i++) {
		misc = node_to_amd_nb(i)->misc;

		if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
		    PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
			return i;
	}

to fix that.

>  #else
>  
>  #define amd_nb_num(x)		0
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index cc8e7c7..8de8873 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -982,6 +982,24 @@ static u64 get_error_address(struct mce *m)
>  	return addr;
>  }
>  
> +static struct pci_dev *pci_get_related_function(unsigned int vendor,
> +						unsigned int device,
> +						struct pci_dev *related)
> +{
> +	struct pci_dev *dev = NULL;
> +
> +	dev = pci_get_device(vendor, device, dev);
> +	while (dev) {
> +		if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
> +		    (dev->bus->number == related->bus->number) &&
> +		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
> +			break;
> +		dev = pci_get_device(vendor, device, dev);
> +	}
> +
> +	return dev;
> +}
> +
>  static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
>  {
>  	struct cpuinfo_x86 *c = &boot_cpu_data;
> @@ -1001,11 +1019,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
>  
>  	/* Factor in CC6 save area by reading dst node's limit reg */
>  	if (c->x86 == 0x15) {
> -		struct pci_dev *f1 = NULL;
> -		u8 nid = dram_dst_node(pvt, range);
> +		struct pci_dev *misc, *f1 = NULL;
> +		u16 nid = dram_dst_node(pvt, range);
>  		u32 llim;
>  
> -		f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1));
> +		misc = node_to_amd_nb(nid)->misc;
> +		f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
>  		if (WARN_ON(!f1))
>  			return;

This hunk and the one above changing pci_get_related_function should be
a separate patch.

Also, btw, you're changing node_to_amd_nb to receive u16 in patch 3/3
but adjusting the 'nid' argument here.

For clarity, please do the change to the function argument and its call
sites in one patch.

> -static struct pci_dev *pci_get_related_function(unsigned int vendor,
> -						unsigned int device,
> -						struct pci_dev *related)
> -{
> -	struct pci_dev *dev = NULL;
> -
> -	dev = pci_get_device(vendor, device, dev);
> -	while (dev) {
> -		if ((dev->bus->number == related->bus->number) &&
> -		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
> -			break;
> -		dev = pci_get_device(vendor, device, dev);
> -	}
> -
> -	return dev;
> -}
> -
>  /*
>   * These are tables of eigenvectors (one per line) which can be used for the
>   * construction of the syndrome tables. The modified syndrome search algorithm
> @@ -2546,7 +2548,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
>  	struct mem_ctl_info *mci = NULL;
>  	struct edac_mc_layer layers[2];
>  	int err = 0, ret;
> -	u8 nid = get_node_id(F2);
> +	u8 nid = amd_get_node_id(F2);

this should be u16 now, right?

>  
>  	ret = -ENOMEM;
>  	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
> @@ -2637,7 +2639,7 @@ err_ret:
>  static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
>  					     const struct pci_device_id *mc_type)
>  {
> -	u8 nid = get_node_id(pdev);
> +	u8 nid = amd_get_node_id(pdev);

ditto.

>  	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
>  	struct ecc_settings *s;
>  	int ret = 0;
> @@ -2687,7 +2689,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
>  {
>  	struct mem_ctl_info *mci;
>  	struct amd64_pvt *pvt;
> -	u8 nid = get_node_id(pdev);
> +	u8 nid = amd_get_node_id(pdev);

ditto.

>  	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
>  	struct ecc_settings *s = ecc_stngs[nid];
>  
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index 8d48047..90cae61 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -290,12 +290,6 @@
>  /* MSRs */
>  #define MSR_MCGCTL_NBE			BIT(4)
>  
> -/* AMD sets the first MC device at device ID 0x18. */
> -static inline u8 get_node_id(struct pci_dev *pdev)
> -{
> -	return PCI_SLOT(pdev->devfn) - 0x18;
> -}
> -
>  enum amd_families {
>  	K8_CPUS = 0,
>  	F10_CPUS,

Thanks.

-- 
Regards/Gruss,
Boris.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers
  2012-11-19 10:02 ` [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers Daniel J Blueman
@ 2012-11-20 15:01   ` Borislav Petkov
  0 siblings, 0 replies; 7+ messages in thread
From: Borislav Petkov @ 2012-11-20 15:01 UTC (permalink / raw)
  To: Daniel J Blueman
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel

On Mon, Nov 19, 2012 at 06:02:47PM +0800, Daniel J Blueman wrote:
> As the AMD64 last-level-cache ID is 16-bits and federated systems
> eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
> controllers, use 16-bits to store the ID.
> 
> v2: Avoid change to intlv_en variable
> v3: Drop unneeded change to index
> 
> Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
> ---
>  drivers/edac/amd64_edac.c |   17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 8de8873..6e3f002 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
>  		struct amd64_pvt *pvt;
>  		u64 cc6_base, tmp_addr;
>  		u32 tmp;
> -		u8 mce_nid, intlv_en;
> +		u16 mce_nid;

Same here: this change needs to go together with the patch doing:

-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);

Please make sure you have all changes belonging semantically and
logically together in one patch.

> +		u8 intlv_en;
>  
>  		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
>  			return addr;
> @@ -2298,7 +2299,7 @@ out:
>  	return ret;
>  }
>  
> -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
> +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)

So this u16 nid is being passed as arg to get_cpus_on_this_dct_cpumask()
which does amd_get_nb_id but it's arg is still "unsigned".

So here's how your patchset should look like:

patch 1: add amd_get_node_id() and change all its callsites
patch 2: change amd_get_nb_id() to return u16 and change all its callsites
patch 3: other required changes
patch 4: maybe other unrelated stuff

This way you it is easier to review and to follow 1,2,3,5 years from now
why the changes were done.

>  {
>  	cpumask_var_t cmask;
>  	int cpu;
> @@ -2336,7 +2337,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
>  	return 0;
>  }
>  
> -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
> +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
>  				       struct pci_dev *F3)
>  {
>  	bool ret = true;
> @@ -2388,7 +2389,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
>  	return ret;
>  }
>  
> -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
> +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
>  					struct pci_dev *F3)
>  {
>  	u32 value, mask = 0x3;		/* UECC/CECC enable */
> @@ -2427,7 +2428,7 @@ static const char *ecc_msg =
>  	"'ecc_enable_override'.\n"
>  	" (Note that use of the override may cause unknown side effects.)\n";
>  
> -static bool ecc_enabled(struct pci_dev *F3, u8 nid)
> +static bool ecc_enabled(struct pci_dev *F3, u16 nid)
>  {
>  	u32 value;
>  	u8 ecc_en = 0;
> @@ -2548,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
>  	struct mem_ctl_info *mci = NULL;
>  	struct edac_mc_layer layers[2];
>  	int err = 0, ret;
> -	u8 nid = amd_get_node_id(F2);
> +	u16 nid = amd_get_node_id(F2);

This change should conceptually belong with the patch adding amd_get_node_id.

>  
>  	ret = -ENOMEM;
>  	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
> @@ -2639,7 +2640,7 @@ err_ret:
>  static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
>  					     const struct pci_device_id *mc_type)
>  {
> -	u8 nid = amd_get_node_id(pdev);
> +	u16 nid = amd_get_node_id(pdev);

ditto.

>  	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
>  	struct ecc_settings *s;
>  	int ret = 0;
> @@ -2689,7 +2690,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
>  {
>  	struct mem_ctl_info *mci;
>  	struct amd64_pvt *pvt;
> -	u8 nid = amd_get_node_id(pdev);
> +	u16 nid = amd_get_node_id(pdev);

ditto.

>  	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
>  	struct ecc_settings *s = ecc_stngs[nid];

Thanks.

-- 
Regards/Gruss,
Boris.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers
  2012-11-05  6:05 [PATCH 1/3, v5] " Daniel J Blueman
@ 2012-11-05  6:05 ` Daniel J Blueman
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel J Blueman @ 2012-11-05  6:05 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
	x86, linux-kernel, Daniel J Blueman

As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

v2: Avoid change to intlv_en variable
v3: Drop unneeded change to index

Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
 drivers/edac/amd64_edac.c |   19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 852f1cd..5dfe452 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
 		struct amd64_pvt *pvt;
 		u64 cc6_base, tmp_addr;
 		u32 tmp;
-		u8 mce_nid, intlv_en;
+		u16 mce_nid;
+		u8 intlv_en;
 
 		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
 			return addr;
@@ -2299,7 +2300,7 @@ out:
 	return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
 	cpumask_var_t cmask;
 	int cpu;
@@ -2337,7 +2338,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
 	return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
 				       struct pci_dev *F3)
 {
 	bool ret = true;
@@ -2389,7 +2390,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
 	return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
 					struct pci_dev *F3)
 {
 	u32 value, mask = 0x3;		/* UECC/CECC enable */
@@ -2428,7 +2429,7 @@ static const char *ecc_msg =
 	"'ecc_enable_override'.\n"
 	" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
 	u32 value;
 	u8 ecc_en = 0;
@@ -2549,7 +2550,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
 	struct mem_ctl_info *mci = NULL;
 	struct edac_mc_layer layers[2];
 	int err = 0, ret;
-	u8 nid = amd_get_node_id(F2);
+	u16 nid = amd_get_node_id(F2);
 
 	ret = -ENOMEM;
 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2640,7 +2641,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 					     const struct pci_device_id *mc_type)
 {
-	u8 nid = amd_get_node_id(pdev);
+	u16 nid = amd_get_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s;
 	int ret = 0;
@@ -2690,7 +2691,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
 {
 	struct mem_ctl_info *mci;
 	struct amd64_pvt *pvt;
-	u8 nid = amd_get_node_id(pdev);
+	u16 nid = amd_get_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-11-20 15:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-19 10:02 [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Daniel J Blueman
2012-11-19 10:02 ` [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers Daniel J Blueman
2012-11-20 15:01   ` Borislav Petkov
2012-11-19 10:02 ` [PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent Daniel J Blueman
2012-11-20 15:01   ` Borislav Petkov
2012-11-20 15:01 ` [PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support Borislav Petkov
  -- strict thread matches above, loose matches on Subject: below --
2012-11-05  6:05 [PATCH 1/3, v5] " Daniel J Blueman
2012-11-05  6:05 ` [PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers Daniel J Blueman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).