linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3 0/2] Add two-level support to ITS device table
@ 2016-05-09 20:58 Shanker Donthineni
  2016-05-09 20:58 ` [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions Shanker Donthineni
  2016-05-09 20:58 ` [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support Shanker Donthineni
  0 siblings, 2 replies; 8+ messages in thread
From: Shanker Donthineni @ 2016-05-09 20:58 UTC (permalink / raw)
  To: Marc Zyngier, linux-arm-kernel, linux-kernel
  Cc: Thomas Gleixner, Jason Cooper, Vikram Sethi, Philip Elcan,
	Shanker Donthineni

This patchset contains necessary code changes to support two-level
(Indirection) table walk feature for device table.

Shanker Donthineni (2):
  irqchip/gicv3-its: split its_alloc_tables() into two functions
  irqchip/gicv3-its: Implement two-level(indirect) device table support

 drivers/irqchip/irq-gic-v3-its.c   | 351 +++++++++++++++++++++++--------------
 include/linux/irqchip/arm-gic-v3.h |   3 +
 2 files changed, 223 insertions(+), 131 deletions(-)

-- 

This patch is based on Marc Zyngier's branch https://git.kernel.org/cgit/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.7

I have tested the Indirection feature on Qualcomm Technologies QDF2XXX server platform.

Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, 
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions
  2016-05-09 20:58 [PATCH V3 0/2] Add two-level support to ITS device table Shanker Donthineni
@ 2016-05-09 20:58 ` Shanker Donthineni
  2016-06-04  8:53   ` Marc Zyngier
  2016-06-04 11:45   ` Marc Zyngier
  2016-05-09 20:58 ` [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support Shanker Donthineni
  1 sibling, 2 replies; 8+ messages in thread
From: Shanker Donthineni @ 2016-05-09 20:58 UTC (permalink / raw)
  To: Marc Zyngier, linux-arm-kernel, linux-kernel
  Cc: Thomas Gleixner, Jason Cooper, Vikram Sethi, Philip Elcan,
	Shanker Donthineni

The function is getting out of control, it has too many goto
statements and would be too complicated for adding a feature
two-level device table. So, it is time for us to cleanup and
move some of the logic to a separate function without affecting
the existing functionality.

Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
---
 drivers/irqchip/irq-gic-v3-its.c   | 256 ++++++++++++++++++++-----------------
 include/linux/irqchip/arm-gic-v3.h |   3 +
 2 files changed, 144 insertions(+), 115 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 6bd881b..b23e00c 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -55,13 +55,15 @@ struct its_collection {
 };
 
 /*
- * The ITS_BASER structure - contains memory information and cached
- * value of BASER register configuration.
+ * The ITS_BASER structure - contains memory information, cached value
+ * of BASER register configuration, ioremaped address and page size.
  */
 struct its_baser {
+	void __iomem	*hwreg;
 	void		*base;
 	u64		val;
 	u32		order;
+	u32		psz;
 };
 
 /*
@@ -823,27 +825,135 @@ static void its_free_tables(struct its_node *its)
 	}
 }
 
+static int its_baser_setup(struct its_node *its, struct its_baser *baser,
+				  u32 order, u64 indirect)
+{
+	u64 val = readq_relaxed(baser->hwreg);
+	u64 type = GITS_BASER_TYPE(val);
+	u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
+	int psz, alloc_pages;
+	u64 cache, shr, tmp;
+	void *base;
+
+	/* Do first attempt with the requested attributes */
+	cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
+	shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
+	psz = baser->psz;
+
+retry_alloc_baser:
+	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
+	if (alloc_pages > GITS_BASER_PAGES_MAX) {
+		pr_warn("ITS@%lx: %s too large, reduce ITS pages %u->%u\n",
+			its->phys_base, its_base_type_string[type],
+			alloc_pages, GITS_BASER_PAGES_MAX);
+		alloc_pages = GITS_BASER_PAGES_MAX;
+		order = get_order(GITS_BASER_PAGES_MAX * psz);
+	}
+
+	base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!base)
+		return -ENOMEM;
+
+retry_baser:
+	val = (virt_to_phys(base)				 |
+		(type << GITS_BASER_TYPE_SHIFT)			 |
+		((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
+		((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT)	 |
+		cache						 |
+		shr						 |
+		indirect					 |
+		GITS_BASER_VALID);
+
+	switch (psz) {
+	case SZ_4K:
+		val |= GITS_BASER_PAGE_SIZE_4K;
+		break;
+	case SZ_16K:
+		val |= GITS_BASER_PAGE_SIZE_16K;
+		break;
+	case SZ_64K:
+		val |= GITS_BASER_PAGE_SIZE_64K;
+		break;
+	}
+
+	writeq_relaxed(val, baser->hwreg);
+	tmp = readq_relaxed(baser->hwreg);
+
+	if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
+		/*
+		 * Shareability didn't stick. Just use
+		 * whatever the read reported, which is likely
+		 * to be the only thing this redistributor
+		 * supports. If that's zero, make it
+		 * non-cacheable as well.
+		 */
+		shr = tmp & GITS_BASER_SHAREABILITY_MASK;
+		if (!shr) {
+			cache = GITS_BASER_nC;
+			__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
+		}
+		goto retry_baser;
+	}
+
+	if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
+		/*
+		 * Page size didn't stick. Let's try a smaller
+		 * size and retry. If we reach 4K, then
+		 * something is horribly wrong...
+		 */
+		free_pages((unsigned long)base, order);
+		baser->base = NULL;
+
+		switch (psz) {
+		case SZ_16K:
+			psz = SZ_4K;
+			goto retry_alloc_baser;
+		case SZ_64K:
+			psz = SZ_16K;
+			goto retry_alloc_baser;
+		}
+	}
+
+	if (val != tmp) {
+		pr_err("ITS@%lx: %s doesn't stick: %lx %lx\n",
+		       its->phys_base, its_base_type_string[type],
+		       (unsigned long) val, (unsigned long) tmp);
+		free_pages((unsigned long)base, order);
+		return -ENXIO;
+	}
+
+	baser->base = base;
+	baser->order = order;
+	baser->psz = psz;
+	baser->val = val;
+	tmp = indirect ? GITS_LVL1_ENTRY_SIZE : entry_size;
+
+	pr_info("ITS@%lx: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n",
+		its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / tmp),
+		its_base_type_string[type],
+		(unsigned long)virt_to_phys(base),
+		indirect ? "indirect" : "flat", (int)entry_size,
+		psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
+
+	return 0;
+}
+
 static int its_alloc_tables(const char *node_name, struct its_node *its)
 {
-	int err;
-	int i;
-	int psz = SZ_64K;
+	u64 typer = readq_relaxed(its->base + GITS_TYPER);
+	u32 ids = GITS_TYPER_DEVBITS(typer);
 	u64 shr = GITS_BASER_InnerShareable;
-	u64 cache;
-	u64 typer;
-	u32 ids;
+	u64 cache = GITS_BASER_WaWb;
+	int psz = SZ_64K;
+	int err, i;
 
 	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375) {
 		/*
 		 * erratum 22375: only alloc 8MB table size
 		 * erratum 24313: ignore memory access type
 		 */
-		cache	= 0;
+		cache	= GITS_BASER_nCnB;
 		ids	= 0x14;			/* 20 bits, 8MB */
-	} else {
-		cache	= GITS_BASER_WaWb;
-		typer	= readq_relaxed(its->base + GITS_TYPER);
-		ids	= GITS_TYPER_DEVBITS(typer);
 	}
 
 	its->device_ids = ids;
@@ -853,13 +963,16 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 		u64 type = GITS_BASER_TYPE(val);
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
 		int order = get_order(psz);
-		int alloc_pages;
-		u64 tmp;
-		void *base;
+		struct its_baser *baser = its->tables + i;
 
 		if (type == GITS_BASER_TYPE_NONE)
 			continue;
 
+		/* Set preferred settings for this BASERn */
+		baser->hwreg = its->base + GITS_BASER + i * 8;
+		baser->val = cache | shr;
+		baser->psz = psz;
+
 		/*
 		 * Allocate as many entries as required to fit the
 		 * range of device IDs that the ITS can grok... The ID
@@ -875,115 +988,28 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 			 * smaller than that.  If the requested allocation
 			 * is smaller, round up to the default page granule.
 			 */
-			order = max(get_order((1UL << ids) * entry_size),
-				    order);
+			order = max(get_order(entry_size << ids), order);
 			if (order >= MAX_ORDER) {
 				order = MAX_ORDER - 1;
-				pr_warn("%s: Device Table too large, reduce its page order to %u\n",
-					node_name, order);
-			}
-		}
-
-retry_alloc_baser:
-		alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
-		if (alloc_pages > GITS_BASER_PAGES_MAX) {
-			alloc_pages = GITS_BASER_PAGES_MAX;
-			order = get_order(GITS_BASER_PAGES_MAX * psz);
-			pr_warn("%s: Device Table too large, reduce its page order to %u (%u pages)\n",
-				node_name, order, alloc_pages);
-		}
-
-		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-		if (!base) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-
-		its->tables[i].base = base;
-		its->tables[i].order = order;
-
-retry_baser:
-		val = (virt_to_phys(base) 				 |
-		       (type << GITS_BASER_TYPE_SHIFT)			 |
-		       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
-		       cache						 |
-		       shr						 |
-		       GITS_BASER_VALID);
-
-		switch (psz) {
-		case SZ_4K:
-			val |= GITS_BASER_PAGE_SIZE_4K;
-			break;
-		case SZ_16K:
-			val |= GITS_BASER_PAGE_SIZE_16K;
-			break;
-		case SZ_64K:
-			val |= GITS_BASER_PAGE_SIZE_64K;
-			break;
-		}
-
-		val |= alloc_pages - 1;
-		its->tables[i].val = val;
-
-		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
-		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
-
-		if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
-			/*
-			 * Shareability didn't stick. Just use
-			 * whatever the read reported, which is likely
-			 * to be the only thing this redistributor
-			 * supports. If that's zero, make it
-			 * non-cacheable as well.
-			 */
-			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
-			if (!shr) {
-				cache = GITS_BASER_nC;
-				__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
+				ids = ilog2(PAGE_ORDER_TO_SIZE(order) / entry_size);
+				pr_warn("ITS@%lx:: Device Table too large, reduce ids %u->%u\n",
+					its->phys_base, its->device_ids, ids);
 			}
-			goto retry_baser;
 		}
 
-		if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
-			/*
-			 * Page size didn't stick. Let's try a smaller
-			 * size and retry. If we reach 4K, then
-			 * something is horribly wrong...
-			 */
-			free_pages((unsigned long)base, order);
-			its->tables[i].base = NULL;
-
-			switch (psz) {
-			case SZ_16K:
-				psz = SZ_4K;
-				goto retry_alloc_baser;
-			case SZ_64K:
-				psz = SZ_16K;
-				goto retry_alloc_baser;
-			}
-		}
-
-		if (val != tmp) {
-			pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx %lx\n",
-			       node_name, i,
-			       (unsigned long) val, (unsigned long) tmp);
-			err = -ENXIO;
-			goto out_free;
+		err = its_baser_setup(its, baser, order, 0);
+		if (err < 0) {
+			its_free_tables(its);
+			return err;
 		}
 
-		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-			(int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
-			its_base_type_string[type],
-			(unsigned long)virt_to_phys(base),
-			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
+		/* Update settings which will be used for next BASERn */
+		psz = baser->psz;
+		cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
+		shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
 	}
 
 	return 0;
-
-out_free:
-	its_free_tables(its);
-
-	return err;
 }
 
 static int its_alloc_collections(struct its_node *its)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 9e6fdd3..7f917b9 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -204,6 +204,7 @@
 #define GITS_BASER_NR_REGS		8
 
 #define GITS_BASER_VALID		(1UL << 63)
+#define GITS_BASER_INDIRECT		(1UL << 62)
 #define GITS_BASER_nCnB			(0UL << 59)
 #define GITS_BASER_nC			(1UL << 59)
 #define GITS_BASER_RaWt			(2UL << 59)
@@ -228,6 +229,7 @@
 #define GITS_BASER_PAGE_SIZE_64K	(2UL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGE_SIZE_MASK	(3UL << GITS_BASER_PAGE_SIZE_SHIFT)
 #define GITS_BASER_PAGES_MAX		256
+#define GITS_BASER_PAGES_SHIFT		(0)
 
 #define GITS_BASER_TYPE_NONE		0
 #define GITS_BASER_TYPE_DEVICE		1
@@ -238,6 +240,7 @@
 #define GITS_BASER_TYPE_RESERVED6	6
 #define GITS_BASER_TYPE_RESERVED7	7
 
+#define GITS_LVL1_ENTRY_SIZE		(8UL)
 /*
  * ITS commands
  */
-- 
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, 
a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support
  2016-05-09 20:58 [PATCH V3 0/2] Add two-level support to ITS device table Shanker Donthineni
  2016-05-09 20:58 ` [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions Shanker Donthineni
@ 2016-05-09 20:58 ` Shanker Donthineni
  2016-06-04  9:09   ` Marc Zyngier
  1 sibling, 1 reply; 8+ messages in thread
From: Shanker Donthineni @ 2016-05-09 20:58 UTC (permalink / raw)
  To: Marc Zyngier, linux-arm-kernel, linux-kernel
  Cc: Thomas Gleixner, Jason Cooper, Vikram Sethi, Philip Elcan,
	Shanker Donthineni

Since device IDs are extremely sparse, the single, a.k.a flat table is
not sufficient for the following two reasons.

1) According to ARM-GIC spec, ITS hw can access maximum of 256(pages)*
   64K(pageszie) bytes. In the best case, it supports upto DEVid=21
   sparse with minimum device table entry size 8bytes.

2) The maximum memory size that is possible without memblock depends on
   MAX_ORDER. 4MB on 4K page size kernel with default MAX_ORDER, so it
   supports DEVid range 19bits.

The two-level device table feature brings us two advantages, the first
is a very high possibility of supporting upto 32bit sparse, and the
second one is the best utilization of memory allocation.

The feature is enabled automatically during driver probe if a single
ITS page is not adequate for flat table and the hardware is capable
of two-level table walk.

Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
---

Changes since v2:
  Fixed a porting bug device 'id' validation check in its_alloc_device_table()

Changes since v1:
  Most of this patch has been rewritten after refactoring its_alloc_tables().
  Always enable device two-level if the memory requirement is more than PAGE_SIZE.
  Fixed the coding bug that breaks on the BE machine.
  Edited the commit text.

 drivers/irqchip/irq-gic-v3-its.c | 97 +++++++++++++++++++++++++++++++++-------
 1 file changed, 80 insertions(+), 17 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index b23e00c..60a1060 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -938,6 +938,18 @@ retry_baser:
 	return 0;
 }
 
+/**
+ * Find out whether an implemented baser register supports a single, flat table
+ * or a two-level table by reading bit offset at '62' after writing '1' to it.
+ */
+static u64 its_baser_check_indirect(struct its_baser *baser)
+{
+	u64 val = GITS_BASER_InnerShareable | GITS_BASER_WaWb;
+
+	writeq_relaxed(val | GITS_BASER_INDIRECT, baser->hwreg);
+	return (readq_relaxed(baser->hwreg) & GITS_BASER_INDIRECT);
+}
+
 static int its_alloc_tables(const char *node_name, struct its_node *its)
 {
 	u64 typer = readq_relaxed(its->base + GITS_TYPER);
@@ -964,6 +976,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
 		int order = get_order(psz);
 		struct its_baser *baser = its->tables + i;
+		u64 indirect = 0;
 
 		if (type == GITS_BASER_TYPE_NONE)
 			continue;
@@ -977,17 +990,27 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 		 * Allocate as many entries as required to fit the
 		 * range of device IDs that the ITS can grok... The ID
 		 * space being incredibly sparse, this results in a
-		 * massive waste of memory.
+		 * massive waste of memory if two-level device table
+		 * feature is not supported by hardware.
 		 *
 		 * For other tables, only allocate a single page.
 		 */
 		if (type == GITS_BASER_TYPE_DEVICE) {
-			/*
-			 * 'order' was initialized earlier to the default page
-			 * granule of the the ITS.  We can't have an allocation
-			 * smaller than that.  If the requested allocation
-			 * is smaller, round up to the default page granule.
-			 */
+			if ((entry_size << ids) > psz)
+				indirect = its_baser_check_indirect(baser);
+
+			if (indirect) {
+				/*
+				 * The size of the lvl2 table is equal to ITS
+				 * page size which is 'psz'. For computing lvl1
+				 * table size, subtract ID bits that sparse
+				 * lvl2 table from 'ids' which is reported by
+				 * ITS hardware times lvl1 table entry size.
+				 */
+				ids -= ilog2(psz / entry_size);
+				entry_size = GITS_LVL1_ENTRY_SIZE;
+			}
+
 			order = max(get_order(entry_size << ids), order);
 			if (order >= MAX_ORDER) {
 				order = MAX_ORDER - 1;
@@ -997,7 +1020,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 			}
 		}
 
-		err = its_baser_setup(its, baser, order, 0);
+		err = its_baser_setup(its, baser, order, indirect);
 		if (err < 0) {
 			its_free_tables(its);
 			return err;
@@ -1187,10 +1210,57 @@ static struct its_baser *its_get_baser(struct its_node *its, u32 type)
 	return NULL;
 }
 
+static bool its_alloc_device_table(struct its_node *its, u32 dev_id)
+{
+	struct its_baser *baser;
+	struct page *page;
+	u32 esz, idx;
+	u64 *table;
+
+	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
+
+	/* Don't allow device id that exceeds ITS hardware limit */
+	if (!baser)
+		return (ilog2(dev_id) < its->device_ids);
+
+	/* Don't allow device id that exceeds single, flat table limit */
+	esz = GITS_BASER_ENTRY_SIZE(baser->val);
+	if (!(baser->val & GITS_BASER_INDIRECT))
+		return (dev_id < (PAGE_ORDER_TO_SIZE(baser->order) / esz));
+
+	/* Compute 1st level table index & check if that exceeds table limit */
+	idx = dev_id >> ilog2(baser->psz / esz);
+	if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) / GITS_LVL1_ENTRY_SIZE))
+		return false;
+
+	table = baser->base;
+
+	/* Allocate memory for 2nd level table */
+	if (!table[idx]) {
+		page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(baser->psz));
+		if (!page)
+			return false;
+
+		/* Flush memory to PoC if hardware doesn't support coherency */
+		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
+			__flush_dcache_area(page_address(page), baser->psz);
+
+		table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID);
+
+		/* Flush memory to PoC if hardware doesn't support coherency */
+		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
+			__flush_dcache_area(table + idx, GITS_LVL1_ENTRY_SIZE);
+
+		/* Ensure updated table contents are visible to ITS hardware */
+		dsb(sy);
+	}
+
+	return true;
+}
+
 static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 					    int nvecs)
 {
-	struct its_baser *baser;
 	struct its_device *dev;
 	unsigned long *lpi_map;
 	unsigned long flags;
@@ -1201,14 +1271,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 	int nr_ites;
 	int sz;
 
-	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
-
-	/* Don't allow 'dev_id' that exceeds single, flat table limit */
-	if (baser) {
-		if (dev_id >= (PAGE_ORDER_TO_SIZE(baser->order) /
-			      GITS_BASER_ENTRY_SIZE(baser->val)))
-			return NULL;
-	} else if (ilog2(dev_id) >= its->device_ids)
+	if (!its_alloc_device_table(its, dev_id))
 		return NULL;
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-- 
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, 
a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions
  2016-05-09 20:58 ` [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions Shanker Donthineni
@ 2016-06-04  8:53   ` Marc Zyngier
  2016-06-04 14:30     ` Shanker Donthineni
  2016-06-04 11:45   ` Marc Zyngier
  1 sibling, 1 reply; 8+ messages in thread
From: Marc Zyngier @ 2016-06-04  8:53 UTC (permalink / raw)
  To: Shanker Donthineni
  Cc: linux-arm-kernel, linux-kernel, Thomas Gleixner, Jason Cooper,
	Vikram Sethi, Philip Elcan

On Mon, 9 May 2016 15:58:25 -0500
Shanker Donthineni <shankerd@codeaurora.org> wrote:

> The function is getting out of control, it has too many goto
> statements and would be too complicated for adding a feature
> two-level device table. So, it is time for us to cleanup and
> move some of the logic to a separate function without affecting
> the existing functionality.
> 
> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
> ---
>  drivers/irqchip/irq-gic-v3-its.c   | 256 ++++++++++++++++++++-----------------
>  include/linux/irqchip/arm-gic-v3.h |   3 +
>  2 files changed, 144 insertions(+), 115 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 6bd881b..b23e00c 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -55,13 +55,15 @@ struct its_collection {
>  };
>  
>  /*
> - * The ITS_BASER structure - contains memory information and cached
> - * value of BASER register configuration.
> + * The ITS_BASER structure - contains memory information, cached value
> + * of BASER register configuration, ioremaped address and page size.
>   */
>  struct its_baser {
> +	void __iomem	*hwreg;

I'm not overly fond of caching arbitrary device addresses, and I'd be
happier if you had the GITS_BASERn index in there, together with a
couple of helpers to perform the access:

void its_write_baser(struct its_node *its, struct its_baser *baser,
		     u64 val);
u64 its_read_baser(struct its_node *its, struct its_baser *baser);

and keep the offset computing out of sight.

>  	void		*base;
>  	u64		val;
>  	u32		order;
> +	u32		psz;
>  };
>  
>  /*
> @@ -823,27 +825,135 @@ static void its_free_tables(struct its_node *its)
>  	}
>  }
>  
> +static int its_baser_setup(struct its_node *its, struct its_baser *baser,
> +				  u32 order, u64 indirect)

Please move the indirect support to the next patch. I'd like to see
something that doesn't have any semantic change.

> +{
> +	u64 val = readq_relaxed(baser->hwreg);
> +	u64 type = GITS_BASER_TYPE(val);
> +	u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
> +	int psz, alloc_pages;
> +	u64 cache, shr, tmp;
> +	void *base;
> +
> +	/* Do first attempt with the requested attributes */
> +	cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
> +	shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
> +	psz = baser->psz;
> +
> +retry_alloc_baser:
> +	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
> +	if (alloc_pages > GITS_BASER_PAGES_MAX) {
> +		pr_warn("ITS@%lx: %s too large, reduce ITS pages %u->%u\n",
> +			its->phys_base, its_base_type_string[type],
> +			alloc_pages, GITS_BASER_PAGES_MAX);
> +		alloc_pages = GITS_BASER_PAGES_MAX;
> +		order = get_order(GITS_BASER_PAGES_MAX * psz);
> +	}
> +
> +	base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
> +	if (!base)
> +		return -ENOMEM;
> +
> +retry_baser:
> +	val = (virt_to_phys(base)				 |
> +		(type << GITS_BASER_TYPE_SHIFT)			 |
> +		((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
> +		((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT)	 |
> +		cache						 |
> +		shr						 |
> +		indirect					 |

See my comment on the next patch. This should be a bool, and used with
something like:
		[...]
		indirect ? GITS_BASER_INDIRECT : 0		|
		[...]

(and of course moved to the next patch, together with the rest of the
indirect support.

> +		GITS_BASER_VALID);
> +
> +	switch (psz) {
> +	case SZ_4K:
> +		val |= GITS_BASER_PAGE_SIZE_4K;
> +		break;
> +	case SZ_16K:
> +		val |= GITS_BASER_PAGE_SIZE_16K;
> +		break;
> +	case SZ_64K:
> +		val |= GITS_BASER_PAGE_SIZE_64K;
> +		break;
> +	}
> +
> +	writeq_relaxed(val, baser->hwreg);
> +	tmp = readq_relaxed(baser->hwreg);
> +
> +	if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
> +		/*
> +		 * Shareability didn't stick. Just use
> +		 * whatever the read reported, which is likely
> +		 * to be the only thing this redistributor
> +		 * supports. If that's zero, make it
> +		 * non-cacheable as well.
> +		 */
> +		shr = tmp & GITS_BASER_SHAREABILITY_MASK;
> +		if (!shr) {
> +			cache = GITS_BASER_nC;
> +			__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
> +		}
> +		goto retry_baser;
> +	}
> +
> +	if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
> +		/*
> +		 * Page size didn't stick. Let's try a smaller
> +		 * size and retry. If we reach 4K, then
> +		 * something is horribly wrong...
> +		 */
> +		free_pages((unsigned long)base, order);
> +		baser->base = NULL;
> +
> +		switch (psz) {
> +		case SZ_16K:
> +			psz = SZ_4K;
> +			goto retry_alloc_baser;
> +		case SZ_64K:
> +			psz = SZ_16K;
> +			goto retry_alloc_baser;
> +		}
> +	}
> +
> +	if (val != tmp) {
> +		pr_err("ITS@%lx: %s doesn't stick: %lx %lx\n",
> +		       its->phys_base, its_base_type_string[type],
> +		       (unsigned long) val, (unsigned long) tmp);
> +		free_pages((unsigned long)base, order);
> +		return -ENXIO;
> +	}
> +
> +	baser->base = base;
> +	baser->order = order;
> +	baser->psz = psz;
> +	baser->val = val;
> +	tmp = indirect ? GITS_LVL1_ENTRY_SIZE : entry_size;

Patch #2

> +
> +	pr_info("ITS@%lx: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n",
> +		its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / tmp),
> +		its_base_type_string[type],
> +		(unsigned long)virt_to_phys(base),
> +		indirect ? "indirect" : "flat", (int)entry_size,
> +		psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
> +
> +	return 0;
> +}
> +
>  static int its_alloc_tables(const char *node_name, struct its_node *its)
>  {
> -	int err;
> -	int i;
> -	int psz = SZ_64K;
> +	u64 typer = readq_relaxed(its->base + GITS_TYPER);
> +	u32 ids = GITS_TYPER_DEVBITS(typer);
>  	u64 shr = GITS_BASER_InnerShareable;
> -	u64 cache;
> -	u64 typer;
> -	u32 ids;
> +	u64 cache = GITS_BASER_WaWb;
> +	int psz = SZ_64K;
> +	int err, i;
>  
>  	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375) {
>  		/*
>  		 * erratum 22375: only alloc 8MB table size
>  		 * erratum 24313: ignore memory access type
>  		 */
> -		cache	= 0;
> +		cache	= GITS_BASER_nCnB;
>  		ids	= 0x14;			/* 20 bits, 8MB */
> -	} else {
> -		cache	= GITS_BASER_WaWb;
> -		typer	= readq_relaxed(its->base + GITS_TYPER);
> -		ids	= GITS_TYPER_DEVBITS(typer);
>  	}
>  
>  	its->device_ids = ids;
> @@ -853,13 +963,16 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
>  		u64 type = GITS_BASER_TYPE(val);
>  		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
>  		int order = get_order(psz);
> -		int alloc_pages;
> -		u64 tmp;
> -		void *base;
> +		struct its_baser *baser = its->tables + i;
>  
>  		if (type == GITS_BASER_TYPE_NONE)
>  			continue;
>  
> +		/* Set preferred settings for this BASERn */
> +		baser->hwreg = its->base + GITS_BASER + i * 8;
> +		baser->val = cache | shr;
> +		baser->psz = psz;
> +
>  		/*
>  		 * Allocate as many entries as required to fit the
>  		 * range of device IDs that the ITS can grok... The ID
> @@ -875,115 +988,28 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
>  			 * smaller than that.  If the requested allocation
>  			 * is smaller, round up to the default page granule.
>  			 */
> -			order = max(get_order((1UL << ids) * entry_size),
> -				    order);
> +			order = max(get_order(entry_size << ids), order);
>  			if (order >= MAX_ORDER) {
>  				order = MAX_ORDER - 1;
> -				pr_warn("%s: Device Table too large, reduce its page order to %u\n",
> -					node_name, order);
> -			}
> -		}
> -
> -retry_alloc_baser:
> -		alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
> -		if (alloc_pages > GITS_BASER_PAGES_MAX) {
> -			alloc_pages = GITS_BASER_PAGES_MAX;
> -			order = get_order(GITS_BASER_PAGES_MAX * psz);
> -			pr_warn("%s: Device Table too large, reduce its page order to %u (%u pages)\n",
> -				node_name, order, alloc_pages);
> -		}
> -
> -		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
> -		if (!base) {
> -			err = -ENOMEM;
> -			goto out_free;
> -		}
> -
> -		its->tables[i].base = base;
> -		its->tables[i].order = order;
> -
> -retry_baser:
> -		val = (virt_to_phys(base) 				 |
> -		       (type << GITS_BASER_TYPE_SHIFT)			 |
> -		       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
> -		       cache						 |
> -		       shr						 |
> -		       GITS_BASER_VALID);
> -
> -		switch (psz) {
> -		case SZ_4K:
> -			val |= GITS_BASER_PAGE_SIZE_4K;
> -			break;
> -		case SZ_16K:
> -			val |= GITS_BASER_PAGE_SIZE_16K;
> -			break;
> -		case SZ_64K:
> -			val |= GITS_BASER_PAGE_SIZE_64K;
> -			break;
> -		}
> -
> -		val |= alloc_pages - 1;
> -		its->tables[i].val = val;
> -
> -		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
> -		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
> -
> -		if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
> -			/*
> -			 * Shareability didn't stick. Just use
> -			 * whatever the read reported, which is likely
> -			 * to be the only thing this redistributor
> -			 * supports. If that's zero, make it
> -			 * non-cacheable as well.
> -			 */
> -			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
> -			if (!shr) {
> -				cache = GITS_BASER_nC;
> -				__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
> +				ids = ilog2(PAGE_ORDER_TO_SIZE(order) / entry_size);
> +				pr_warn("ITS@%lx:: Device Table too large, reduce ids %u->%u\n",
> +					its->phys_base, its->device_ids, ids);
>  			}
> -			goto retry_baser;
>  		}
>  
> -		if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
> -			/*
> -			 * Page size didn't stick. Let's try a smaller
> -			 * size and retry. If we reach 4K, then
> -			 * something is horribly wrong...
> -			 */
> -			free_pages((unsigned long)base, order);
> -			its->tables[i].base = NULL;
> -
> -			switch (psz) {
> -			case SZ_16K:
> -				psz = SZ_4K;
> -				goto retry_alloc_baser;
> -			case SZ_64K:
> -				psz = SZ_16K;
> -				goto retry_alloc_baser;
> -			}
> -		}
> -
> -		if (val != tmp) {
> -			pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx %lx\n",
> -			       node_name, i,
> -			       (unsigned long) val, (unsigned long) tmp);
> -			err = -ENXIO;
> -			goto out_free;
> +		err = its_baser_setup(its, baser, order, 0);
> +		if (err < 0) {
> +			its_free_tables(its);
> +			return err;
>  		}
>  
> -		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
> -			(int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
> -			its_base_type_string[type],
> -			(unsigned long)virt_to_phys(base),
> -			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
> +		/* Update settings which will be used for next BASERn */
> +		psz = baser->psz;
> +		cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
> +		shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
>  	}
>  
>  	return 0;
> -
> -out_free:
> -	its_free_tables(its);
> -
> -	return err;
>  }
>  
>  static int its_alloc_collections(struct its_node *its)
> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
> index 9e6fdd3..7f917b9 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -204,6 +204,7 @@
>  #define GITS_BASER_NR_REGS		8
>  
>  #define GITS_BASER_VALID		(1UL << 63)
> +#define GITS_BASER_INDIRECT		(1UL << 62)
>  #define GITS_BASER_nCnB			(0UL << 59)
>  #define GITS_BASER_nC			(1UL << 59)
>  #define GITS_BASER_RaWt			(2UL << 59)
> @@ -228,6 +229,7 @@
>  #define GITS_BASER_PAGE_SIZE_64K	(2UL << GITS_BASER_PAGE_SIZE_SHIFT)
>  #define GITS_BASER_PAGE_SIZE_MASK	(3UL << GITS_BASER_PAGE_SIZE_SHIFT)
>  #define GITS_BASER_PAGES_MAX		256
> +#define GITS_BASER_PAGES_SHIFT		(0)
>  
>  #define GITS_BASER_TYPE_NONE		0
>  #define GITS_BASER_TYPE_DEVICE		1
> @@ -238,6 +240,7 @@
>  #define GITS_BASER_TYPE_RESERVED6	6
>  #define GITS_BASER_TYPE_RESERVED7	7
>  
> +#define GITS_LVL1_ENTRY_SIZE		(8UL)

Second patch as well.

>  /*
>   * ITS commands
>   */


Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support
  2016-05-09 20:58 ` [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support Shanker Donthineni
@ 2016-06-04  9:09   ` Marc Zyngier
  2016-06-04 14:42     ` Shanker Donthineni
  0 siblings, 1 reply; 8+ messages in thread
From: Marc Zyngier @ 2016-06-04  9:09 UTC (permalink / raw)
  To: Shanker Donthineni
  Cc: linux-arm-kernel, linux-kernel, Thomas Gleixner, Jason Cooper,
	Vikram Sethi, Philip Elcan

On Mon, 9 May 2016 15:58:26 -0500
Shanker Donthineni <shankerd@codeaurora.org> wrote:

Hi Shanker,

> Since device IDs are extremely sparse, the single, a.k.a flat table is
> not sufficient for the following two reasons.
> 
> 1) According to ARM-GIC spec, ITS hw can access maximum of 256(pages)*
>    64K(pageszie) bytes. In the best case, it supports upto DEVid=21
         pagesize
>    sparse with minimum device table entry size 8bytes.
> 
> 2) The maximum memory size that is possible without memblock depends on
>    MAX_ORDER. 4MB on 4K page size kernel with default MAX_ORDER, so it
>    supports DEVid range 19bits.
> 
> The two-level device table feature brings us two advantages, the first
> is a very high possibility of supporting upto 32bit sparse, and the
> second one is the best utilization of memory allocation.
> 
> The feature is enabled automatically during driver probe if a single
> ITS page is not adequate for flat table and the hardware is capable
> of two-level table walk.
> 
> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
> ---
> 
> Changes since v2:
>   Fixed a porting bug device 'id' validation check in its_alloc_device_table()
> 
> Changes since v1:
>   Most of this patch has been rewritten after refactoring its_alloc_tables().
>   Always enable device two-level if the memory requirement is more than PAGE_SIZE.
>   Fixed the coding bug that breaks on the BE machine.
>   Edited the commit text.
> 
>  drivers/irqchip/irq-gic-v3-its.c | 97 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 80 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index b23e00c..60a1060 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -938,6 +938,18 @@ retry_baser:
>  	return 0;
>  }
>  
> +/**
> + * Find out whether an implemented baser register supports a single, flat table
> + * or a two-level table by reading bit offset at '62' after writing '1' to it.
> + */
> +static u64 its_baser_check_indirect(struct its_baser *baser)
> +{
> +	u64 val = GITS_BASER_InnerShareable | GITS_BASER_WaWb;
> +
> +	writeq_relaxed(val | GITS_BASER_INDIRECT, baser->hwreg);
> +	return (readq_relaxed(baser->hwreg) & GITS_BASER_INDIRECT); 

That's a bit ugly. You're returning a mask for the indirect bit, and
treat it either as a boolean or a mask. I'd rather you return a
boolean, treat as such in most of this code, and only turn it into a
mask when you compute the GITS_BASER value.

> +}
> +
>  static int its_alloc_tables(const char *node_name, struct its_node *its)
>  {
>  	u64 typer = readq_relaxed(its->base + GITS_TYPER);
> @@ -964,6 +976,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
>  		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
>  		int order = get_order(psz);
>  		struct its_baser *baser = its->tables + i;
> +		u64 indirect = 0;

The scope of this flag is confusingly wide. Once an indirect table has
been created, all the following tables are indirect too, which is
definitely not what we want (only the device table should be
indirected).

>  
>  		if (type == GITS_BASER_TYPE_NONE)
>  			continue;
> @@ -977,17 +990,27 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
>  		 * Allocate as many entries as required to fit the
>  		 * range of device IDs that the ITS can grok... The ID
>  		 * space being incredibly sparse, this results in a
> -		 * massive waste of memory.
> +		 * massive waste of memory if two-level device table
> +		 * feature is not supported by hardware.
>  		 *
>  		 * For other tables, only allocate a single page.
>  		 */
>  		if (type == GITS_BASER_TYPE_DEVICE) {
> -			/*
> -			 * 'order' was initialized earlier to the default page
> -			 * granule of the the ITS.  We can't have an allocation
> -			 * smaller than that.  If the requested allocation
> -			 * is smaller, round up to the default page granule.
> -			 */
> +			if ((entry_size << ids) > psz)
> +				indirect = its_baser_check_indirect(baser);
> +
> +			if (indirect) {
> +				/*
> +				 * The size of the lvl2 table is equal to ITS
> +				 * page size which is 'psz'. For computing lvl1
> +				 * table size, subtract ID bits that sparse
> +				 * lvl2 table from 'ids' which is reported by
> +				 * ITS hardware times lvl1 table entry size.
> +				 */
> +				ids -= ilog2(psz / entry_size);
> +				entry_size = GITS_LVL1_ENTRY_SIZE;
> +			}
> +
>  			order = max(get_order(entry_size << ids), order);
>  			if (order >= MAX_ORDER) {
>  				order = MAX_ORDER - 1;

This needs some splitting as well. Given that we're giving the
Device table a special treatment, I think it'd make sense to give it
its own function that would return the order of the the allocation and
the indirect flag.

> @@ -997,7 +1020,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
>  			}
>  		}
>  
> -		err = its_baser_setup(its, baser, order, 0);
> +		err = its_baser_setup(its, baser, order, indirect);
>  		if (err < 0) {
>  			its_free_tables(its);
>  			return err;
> @@ -1187,10 +1210,57 @@ static struct its_baser *its_get_baser(struct its_node *its, u32 type)
>  	return NULL;
>  }
>  
> +static bool its_alloc_device_table(struct its_node *its, u32 dev_id)
> +{
> +	struct its_baser *baser;
> +	struct page *page;
> +	u32 esz, idx;
> +	u64 *table;
> +
> +	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
> +
> +	/* Don't allow device id that exceeds ITS hardware limit */
> +	if (!baser)
> +		return (ilog2(dev_id) < its->device_ids);
> +
> +	/* Don't allow device id that exceeds single, flat table limit */
> +	esz = GITS_BASER_ENTRY_SIZE(baser->val);
> +	if (!(baser->val & GITS_BASER_INDIRECT))
> +		return (dev_id < (PAGE_ORDER_TO_SIZE(baser->order) / esz));
> +
> +	/* Compute 1st level table index & check if that exceeds table limit */
> +	idx = dev_id >> ilog2(baser->psz / esz);
> +	if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) / GITS_LVL1_ENTRY_SIZE))
> +		return false;
> +
> +	table = baser->base;
> +
> +	/* Allocate memory for 2nd level table */
> +	if (!table[idx]) {
> +		page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(baser->psz));
> +		if (!page)
> +			return false;
> +
> +		/* Flush memory to PoC if hardware doesn't support coherency */
> +		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
> +			__flush_dcache_area(page_address(page), baser->psz);
> +
> +		table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID);
> +
> +		/* Flush memory to PoC if hardware doesn't support coherency */

Please don't use the same comment twice, this is a bit misleading.
Explain that the first clean/invalidate pushes out the data page, and
that the second pushes out the pointer to that page.

> +		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
> +			__flush_dcache_area(table + idx, GITS_LVL1_ENTRY_SIZE);
> +
> +		/* Ensure updated table contents are visible to ITS hardware */
> +		dsb(sy);
> +	}
> +
> +	return true;
> +}
> +
>  static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>  					    int nvecs)
>  {
> -	struct its_baser *baser;
>  	struct its_device *dev;
>  	unsigned long *lpi_map;
>  	unsigned long flags;
> @@ -1201,14 +1271,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>  	int nr_ites;
>  	int sz;
>  
> -	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
> -
> -	/* Don't allow 'dev_id' that exceeds single, flat table limit */
> -	if (baser) {
> -		if (dev_id >= (PAGE_ORDER_TO_SIZE(baser->order) /
> -			      GITS_BASER_ENTRY_SIZE(baser->val)))
> -			return NULL;
> -	} else if (ilog2(dev_id) >= its->device_ids)
> +	if (!its_alloc_device_table(its, dev_id))
>  		return NULL;
>  
>  	dev = kzalloc(sizeof(*dev), GFP_KERNEL);


Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions
  2016-05-09 20:58 ` [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions Shanker Donthineni
  2016-06-04  8:53   ` Marc Zyngier
@ 2016-06-04 11:45   ` Marc Zyngier
  1 sibling, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2016-06-04 11:45 UTC (permalink / raw)
  To: Shanker Donthineni
  Cc: linux-arm-kernel, linux-kernel, Thomas Gleixner, Jason Cooper,
	Vikram Sethi, Philip Elcan

On Mon, 9 May 2016 15:58:25 -0500
Shanker Donthineni <shankerd@codeaurora.org> wrote:

> The function is getting out of control, it has too many goto
> statements and would be too complicated for adding a feature
> two-level device table. So, it is time for us to cleanup and
> move some of the logic to a separate function without affecting
> the existing functionality.
> 
> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
> ---
>  drivers/irqchip/irq-gic-v3-its.c   | 256 ++++++++++++++++++++-----------------
>  include/linux/irqchip/arm-gic-v3.h |   3 +
>  2 files changed, 144 insertions(+), 115 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 6bd881b..b23e00c 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -55,13 +55,15 @@ struct its_collection {
>  };
>  
>  /*
> - * The ITS_BASER structure - contains memory information and cached
> - * value of BASER register configuration.
> + * The ITS_BASER structure - contains memory information, cached value
> + * of BASER register configuration, ioremaped address and page size.
>   */
>  struct its_baser {
> +	void __iomem	*hwreg;
>  	void		*base;
>  	u64		val;
>  	u32		order;
> +	u32		psz;
>  };
>  
>  /*
> @@ -823,27 +825,135 @@ static void its_free_tables(struct its_node *its)
>  	}
>  }
>  
> +static int its_baser_setup(struct its_node *its, struct its_baser *baser,
> +				  u32 order, u64 indirect)
> +{
> +	u64 val = readq_relaxed(baser->hwreg);
> +	u64 type = GITS_BASER_TYPE(val);
> +	u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
> +	int psz, alloc_pages;
> +	u64 cache, shr, tmp;
> +	void *base;
> +
> +	/* Do first attempt with the requested attributes */
> +	cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
> +	shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
> +	psz = baser->psz;
> +
> +retry_alloc_baser:
> +	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
> +	if (alloc_pages > GITS_BASER_PAGES_MAX) {
> +		pr_warn("ITS@%lx: %s too large, reduce ITS pages %u->%u\n",
> +			its->phys_base, its_base_type_string[type],
> +			alloc_pages, GITS_BASER_PAGES_MAX);

By the way: as you're changing the output of various messages, please
use %pa instead of %lx (and make sure you're passing the parameter by
reference...).

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions
  2016-06-04  8:53   ` Marc Zyngier
@ 2016-06-04 14:30     ` Shanker Donthineni
  0 siblings, 0 replies; 8+ messages in thread
From: Shanker Donthineni @ 2016-06-04 14:30 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Philip Elcan, Jason Cooper, Vikram Sethi, linux-kernel,
	Thomas Gleixner, linux-arm-kernel

Hi Marc,

On 06/04/2016 03:53 AM, Marc Zyngier wrote:
> On Mon, 9 May 2016 15:58:25 -0500
> Shanker Donthineni <shankerd@codeaurora.org> wrote:
>
>> The function is getting out of control, it has too many goto
>> statements and would be too complicated for adding a feature
>> two-level device table. So, it is time for us to cleanup and
>> move some of the logic to a separate function without affecting
>> the existing functionality.
>>
>> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
>> ---
>>  drivers/irqchip/irq-gic-v3-its.c   | 256
> ++++++++++++++++++++-----------------
>>  include/linux/irqchip/arm-gic-v3.h |   3 +
>>  2 files changed, 144 insertions(+), 115 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c
> b/drivers/irqchip/irq-gic-v3-its.c
>> index 6bd881b..b23e00c 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -55,13 +55,15 @@ struct its_collection {
>>  };
>>  
>>  /*
>> - * The ITS_BASER structure - contains memory information and cached
>> - * value of BASER register configuration.
>> + * The ITS_BASER structure - contains memory information, cached value
>> + * of BASER register configuration, ioremaped address and page size.
>>   */
>>  struct its_baser {
>> +	void __iomem	*hwreg;
> I'm not overly fond of caching arbitrary device addresses, and I'd be
> happier if you had the GITS_BASERn index in there, together with a
> couple of helpers to perform the access:
>
> void its_write_baser(struct its_node *its, struct its_baser *baser,
> 		     u64 val);
> u64 its_read_baser(struct its_node *its, struct its_baser *baser);
>
> and keep the offset computing out of sight.
Sure, I am happy to do this change and also helps the code readability.

>>  	void		*base;
>>  	u64		val;
>>  	u32		order;
>> +	u32		psz;
>>  };
>>  
>>  /*
>> @@ -823,27 +825,135 @@ static void its_free_tables(struct its_node *its)
>>  	}
>>  }
>>  
>> +static int its_baser_setup(struct its_node *its, struct its_baser
> *baser,
>> +				  u32 order, u64 indirect)
> Please move the indirect support to the next patch. I'd like to see
> something that doesn't have any semantic change.
I'll move ITS-indirection related code logic to next patch.
>> +{
>> +	u64 val = readq_relaxed(baser->hwreg);
>> +	u64 type = GITS_BASER_TYPE(val);
>> +	u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
>> +	int psz, alloc_pages;
>> +	u64 cache, shr, tmp;
>> +	void *base;
>> +
>> +	/* Do first attempt with the requested attributes */
>> +	cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
>> +	shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
>> +	psz = baser->psz;
>> +
>> +retry_alloc_baser:
>> +	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
>> +	if (alloc_pages > GITS_BASER_PAGES_MAX) {
>> +		pr_warn("ITS@%lx: %s too large, reduce ITS pages
> %u->%u\n",
>> +			its->phys_base, its_base_type_string[type],
>> +			alloc_pages, GITS_BASER_PAGES_MAX);
>> +		alloc_pages = GITS_BASER_PAGES_MAX;
>> +		order = get_order(GITS_BASER_PAGES_MAX * psz);
>> +	}
>> +
>> +	base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
>> +	if (!base)
>> +		return -ENOMEM;
>> +
>> +retry_baser:
>> +	val = (virt_to_phys(base)				 |
>> +		(type << GITS_BASER_TYPE_SHIFT)			 |
>> +		((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
>> +		((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT)	 |
>> +		cache						 |
>> +		shr						 |
>> +		indirect					 |
> See my comment on the next patch. This should be a bool, and used with
> something like:
> 		[...]
> 		indirect ? GITS_BASER_INDIRECT : 0		|
> 		[...]
>
> (and of course moved to the next patch, together with the rest of the
> indirect support.
I'll follow your suggestion and the corresponding changes will be moved to next patch.
>> +		GITS_BASER_VALID);
>> +
>> +	switch (psz) {
>> +	case SZ_4K:
>> +		val |= GITS_BASER_PAGE_SIZE_4K;
>> +		break;
>> +	case SZ_16K:
>> +		val |= GITS_BASER_PAGE_SIZE_16K;
>> +		break;
>> +	case SZ_64K:
>> +		val |= GITS_BASER_PAGE_SIZE_64K;
>> +		break;
>> +	}
>> +
>> +	writeq_relaxed(val, baser->hwreg);
>> +	tmp = readq_relaxed(baser->hwreg);
>> +
>> +	if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
>> +		/*
>> +		 * Shareability didn't stick. Just use
>> +		 * whatever the read reported, which is likely
>> +		 * to be the only thing this redistributor
>> +		 * supports. If that's zero, make it
>> +		 * non-cacheable as well.
>> +		 */
>> +		shr = tmp & GITS_BASER_SHAREABILITY_MASK;
>> +		if (!shr) {
>> +			cache = GITS_BASER_nC;
>> +			__flush_dcache_area(base,
> PAGE_ORDER_TO_SIZE(order));
>> +		}
>> +		goto retry_baser;
>> +	}
>> +
>> +	if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
>> +		/*
>> +		 * Page size didn't stick. Let's try a smaller
>> +		 * size and retry. If we reach 4K, then
>> +		 * something is horribly wrong...
>> +		 */
>> +		free_pages((unsigned long)base, order);
>> +		baser->base = NULL;
>> +
>> +		switch (psz) {
>> +		case SZ_16K:
>> +			psz = SZ_4K;
>> +			goto retry_alloc_baser;
>> +		case SZ_64K:
>> +			psz = SZ_16K;
>> +			goto retry_alloc_baser;
>> +		}
>> +	}
>> +
>> +	if (val != tmp) {
>> +		pr_err("ITS@%lx: %s doesn't stick: %lx %lx\n",
>> +		       its->phys_base, its_base_type_string[type],
>> +		       (unsigned long) val, (unsigned long) tmp);
>> +		free_pages((unsigned long)base, order);
>> +		return -ENXIO;
>> +	}
>> +
>> +	baser->base = base;
>> +	baser->order = order;
>> +	baser->psz = psz;
>> +	baser->val = val;
>> +	tmp = indirect ? GITS_LVL1_ENTRY_SIZE : entry_size;
> Patch #2
Sure.
>> +
>> +	pr_info("ITS@%lx: allocated %d %s @%lx (%s, esz %d, psz %dK, shr
> %d)\n",
>> +		its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / tmp),
>> +		its_base_type_string[type],
>> +		(unsigned long)virt_to_phys(base),
>> +		indirect ? "indirect" : "flat", (int)entry_size,
>> +		psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
>> +
>> +	return 0;
>> +}
>> +
>>  static int its_alloc_tables(const char *node_name, struct its_node
> *its)
>>  {
>> -	int err;
>> -	int i;
>> -	int psz = SZ_64K;
>> +	u64 typer = readq_relaxed(its->base + GITS_TYPER);
>> +	u32 ids = GITS_TYPER_DEVBITS(typer);
>>  	u64 shr = GITS_BASER_InnerShareable;
>> -	u64 cache;
>> -	u64 typer;
>> -	u32 ids;
>> +	u64 cache = GITS_BASER_WaWb;
>> +	int psz = SZ_64K;
>> +	int err, i;
>>  
>>  	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375) {
>>  		/*
>>  		 * erratum 22375: only alloc 8MB table size
>>  		 * erratum 24313: ignore memory access type
>>  		 */
>> -		cache	= 0;
>> +		cache	= GITS_BASER_nCnB;
>>  		ids	= 0x14;			/* 20 bits, 8MB */
>> -	} else {
>> -		cache	= GITS_BASER_WaWb;
>> -		typer	= readq_relaxed(its->base + GITS_TYPER);
>> -		ids	= GITS_TYPER_DEVBITS(typer);
>>  	}
>>  
>>  	its->device_ids = ids;
>> @@ -853,13 +963,16 @@ static int its_alloc_tables(const char *node_name,
> struct its_node *its)
>>  		u64 type = GITS_BASER_TYPE(val);
>>  		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
>>  		int order = get_order(psz);
>> -		int alloc_pages;
>> -		u64 tmp;
>> -		void *base;
>> +		struct its_baser *baser = its->tables + i;
>>  
>>  		if (type == GITS_BASER_TYPE_NONE)
>>  			continue;
>>  
>> +		/* Set preferred settings for this BASERn */
>> +		baser->hwreg = its->base + GITS_BASER + i * 8;
>> +		baser->val = cache | shr;
>> +		baser->psz = psz;
>> +
>>  		/*
>>  		 * Allocate as many entries as required to fit the
>>  		 * range of device IDs that the ITS can grok... The ID
>> @@ -875,115 +988,28 @@ static int its_alloc_tables(const char
> *node_name, struct its_node *its)
>>  			 * smaller than that.  If the requested allocation
>>  			 * is smaller, round up to the default page
> granule.
>>  			 */
>> -			order = max(get_order((1UL << ids) * entry_size),
>> -				    order);
>> +			order = max(get_order(entry_size << ids), order);
>>  			if (order >= MAX_ORDER) {
>>  				order = MAX_ORDER - 1;
>> -				pr_warn("%s: Device Table too large,
> reduce its page order to %u\n",
>> -					node_name, order);
>> -			}
>> -		}
>> -
>> -retry_alloc_baser:
>> -		alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
>> -		if (alloc_pages > GITS_BASER_PAGES_MAX) {
>> -			alloc_pages = GITS_BASER_PAGES_MAX;
>> -			order = get_order(GITS_BASER_PAGES_MAX * psz);
>> -			pr_warn("%s: Device Table too large, reduce its
> page order to %u (%u pages)\n",
>> -				node_name, order, alloc_pages);
>> -		}
>> -
>> -		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> order);
>> -		if (!base) {
>> -			err = -ENOMEM;
>> -			goto out_free;
>> -		}
>> -
>> -		its->tables[i].base = base;
>> -		its->tables[i].order = order;
>> -
>> -retry_baser:
>> -		val = (virt_to_phys(base) 				 |
>> -		       (type << GITS_BASER_TYPE_SHIFT)			 |
>> -		       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
>> -		       cache						 |
>> -		       shr						 |
>> -		       GITS_BASER_VALID);
>> -
>> -		switch (psz) {
>> -		case SZ_4K:
>> -			val |= GITS_BASER_PAGE_SIZE_4K;
>> -			break;
>> -		case SZ_16K:
>> -			val |= GITS_BASER_PAGE_SIZE_16K;
>> -			break;
>> -		case SZ_64K:
>> -			val |= GITS_BASER_PAGE_SIZE_64K;
>> -			break;
>> -		}
>> -
>> -		val |= alloc_pages - 1;
>> -		its->tables[i].val = val;
>> -
>> -		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
>> -		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
>> -
>> -		if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
>> -			/*
>> -			 * Shareability didn't stick. Just use
>> -			 * whatever the read reported, which is likely
>> -			 * to be the only thing this redistributor
>> -			 * supports. If that's zero, make it
>> -			 * non-cacheable as well.
>> -			 */
>> -			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
>> -			if (!shr) {
>> -				cache = GITS_BASER_nC;
>> -				__flush_dcache_area(base,
> PAGE_ORDER_TO_SIZE(order));
>> +				ids = ilog2(PAGE_ORDER_TO_SIZE(order) /
> entry_size);
>> +				pr_warn("ITS@%lx:: Device Table too large,
> reduce ids %u->%u\n",
>> +					its->phys_base, its->device_ids,
> ids);
>>  			}
>> -			goto retry_baser;
>>  		}
>>  
>> -		if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
>> -			/*
>> -			 * Page size didn't stick. Let's try a smaller
>> -			 * size and retry. If we reach 4K, then
>> -			 * something is horribly wrong...
>> -			 */
>> -			free_pages((unsigned long)base, order);
>> -			its->tables[i].base = NULL;
>> -
>> -			switch (psz) {
>> -			case SZ_16K:
>> -				psz = SZ_4K;
>> -				goto retry_alloc_baser;
>> -			case SZ_64K:
>> -				psz = SZ_16K;
>> -				goto retry_alloc_baser;
>> -			}
>> -		}
>> -
>> -		if (val != tmp) {
>> -			pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx
> %lx\n",
>> -			       node_name, i,
>> -			       (unsigned long) val, (unsigned long) tmp);
>> -			err = -ENXIO;
>> -			goto out_free;
>> +		err = its_baser_setup(its, baser, order, 0);
>> +		if (err < 0) {
>> +			its_free_tables(its);
>> +			return err;
>>  		}
>>  
>> -		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
>> -			(int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
>> -			its_base_type_string[type],
>> -			(unsigned long)virt_to_phys(base),
>> -			psz / SZ_1K, (int)shr >>
> GITS_BASER_SHAREABILITY_SHIFT);
>> +		/* Update settings which will be used for next BASERn */
>> +		psz = baser->psz;
>> +		cache = baser->val & GITS_BASER_CACHEABILITY_MASK;
>> +		shr = baser->val & GITS_BASER_SHAREABILITY_MASK;
>>  	}
>>  
>>  	return 0;
>> -
>> -out_free:
>> -	its_free_tables(its);
>> -
>> -	return err;
>>  }
>>  
>>  static int its_alloc_collections(struct its_node *its)
>> diff --git a/include/linux/irqchip/arm-gic-v3.h
> b/include/linux/irqchip/arm-gic-v3.h
>> index 9e6fdd3..7f917b9 100644
>> --- a/include/linux/irqchip/arm-gic-v3.h
>> +++ b/include/linux/irqchip/arm-gic-v3.h
>> @@ -204,6 +204,7 @@
>>  #define GITS_BASER_NR_REGS		8
>>  
>>  #define GITS_BASER_VALID		(1UL << 63)
>> +#define GITS_BASER_INDIRECT		(1UL << 62)
>>  #define GITS_BASER_nCnB			(0UL << 59)
>>  #define GITS_BASER_nC			(1UL << 59)
>>  #define GITS_BASER_RaWt			(2UL << 59)
>> @@ -228,6 +229,7 @@
>>  #define GITS_BASER_PAGE_SIZE_64K	(2UL <<
> GITS_BASER_PAGE_SIZE_SHIFT)
>>  #define GITS_BASER_PAGE_SIZE_MASK	(3UL <<
> GITS_BASER_PAGE_SIZE_SHIFT)
>>  #define GITS_BASER_PAGES_MAX		256
>> +#define GITS_BASER_PAGES_SHIFT		(0)
>>  
>>  #define GITS_BASER_TYPE_NONE		0
>>  #define GITS_BASER_TYPE_DEVICE		1
>> @@ -238,6 +240,7 @@
>>  #define GITS_BASER_TYPE_RESERVED6	6
>>  #define GITS_BASER_TYPE_RESERVED7	7
>>  
>> +#define GITS_LVL1_ENTRY_SIZE		(8UL)
> Second patch as well.
Sure,
>>  /*
>>   * ITS commands
>>   */
>
> Thanks,
>
> 	M.

-- 
Shanker Donthineni
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support
  2016-06-04  9:09   ` Marc Zyngier
@ 2016-06-04 14:42     ` Shanker Donthineni
  0 siblings, 0 replies; 8+ messages in thread
From: Shanker Donthineni @ 2016-06-04 14:42 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Philip Elcan, Jason Cooper, Vikram Sethi, linux-kernel,
	Thomas Gleixner, linux-arm-kernel

Hi Marc,

On 06/04/2016 04:09 AM, Marc Zyngier wrote:
> On Mon, 9 May 2016 15:58:26 -0500
> Shanker Donthineni <shankerd@codeaurora.org> wrote:
>
> Hi Shanker,
>
>> Since device IDs are extremely sparse, the single, a.k.a flat table is
>> not sufficient for the following two reasons.
>>
>> 1) According to ARM-GIC spec, ITS hw can access maximum of 256(pages)*
>>    64K(pageszie) bytes. In the best case, it supports upto DEVid=21
>          pagesize
>>    sparse with minimum device table entry size 8bytes.
>>
>> 2) The maximum memory size that is possible without memblock depends on
>>    MAX_ORDER. 4MB on 4K page size kernel with default MAX_ORDER, so it
>>    supports DEVid range 19bits.
>>
>> The two-level device table feature brings us two advantages, the first
>> is a very high possibility of supporting upto 32bit sparse, and the
>> second one is the best utilization of memory allocation.
>>
>> The feature is enabled automatically during driver probe if a single
>> ITS page is not adequate for flat table and the hardware is capable
>> of two-level table walk.
>>
>> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
>> ---
>>
>> Changes since v2:
>>   Fixed a porting bug device 'id' validation check in
> its_alloc_device_table()
>> Changes since v1:
>>   Most of this patch has been rewritten after refactoring
> its_alloc_tables().
>>   Always enable device two-level if the memory requirement is more than
> PAGE_SIZE.
>>   Fixed the coding bug that breaks on the BE machine.
>>   Edited the commit text.
>>
>>  drivers/irqchip/irq-gic-v3-its.c | 97
> +++++++++++++++++++++++++++++++++-------
>>  1 file changed, 80 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c
> b/drivers/irqchip/irq-gic-v3-its.c
>> index b23e00c..60a1060 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -938,6 +938,18 @@ retry_baser:
>>  	return 0;
>>  }
>>  
>> +/**
>> + * Find out whether an implemented baser register supports a single,
> flat table
>> + * or a two-level table by reading bit offset at '62' after writing '1'
> to it.
>> + */
>> +static u64 its_baser_check_indirect(struct its_baser *baser)
>> +{
>> +	u64 val = GITS_BASER_InnerShareable | GITS_BASER_WaWb;
>> +
>> +	writeq_relaxed(val | GITS_BASER_INDIRECT, baser->hwreg);
>> +	return (readq_relaxed(baser->hwreg) & GITS_BASER_INDIRECT); 
> That's a bit ugly. You're returning a mask for the indirect bit, and
> treat it either as a boolean or a mask. I'd rather you return a
> boolean, treat as such in most of this code, and only turn it into a
> mask when you compute the GITS_BASER value.
>
>> +}
>> +
>>  static int its_alloc_tables(const char *node_name, struct its_node
> *its)
>>  {
>>  	u64 typer = readq_relaxed(its->base + GITS_TYPER);
>> @@ -964,6 +976,7 @@ static int its_alloc_tables(const char *node_name,
> struct its_node *its)
>>  		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
>>  		int order = get_order(psz);
>>  		struct its_baser *baser = its->tables + i;
>> +		u64 indirect = 0;
> The scope of this flag is confusingly wide. Once an indirect table has
> been created, all the following tables are indirect too, which is
> definitely not what we want (only the device table should be
> indirected).
Sorry for confusion, the scope of this variable is per BASERn parsing and flag will not be carried to the next  BASERn entry.

>>  
>>  		if (type == GITS_BASER_TYPE_NONE)
>>  			continue;
>> @@ -977,17 +990,27 @@ static int its_alloc_tables(const char *node_name,
> struct its_node *its)
>>  		 * Allocate as many entries as required to fit the
>>  		 * range of device IDs that the ITS can grok... The ID
>>  		 * space being incredibly sparse, this results in a
>> -		 * massive waste of memory.
>> +		 * massive waste of memory if two-level device table
>> +		 * feature is not supported by hardware.
>>  		 *
>>  		 * For other tables, only allocate a single page.
>>  		 */
>>  		if (type == GITS_BASER_TYPE_DEVICE) {
>> -			/*
>> -			 * 'order' was initialized earlier to the default
> page
>> -			 * granule of the the ITS.  We can't have an
> allocation
>> -			 * smaller than that.  If the requested allocation
>> -			 * is smaller, round up to the default page
> granule.
>> -			 */
>> +			if ((entry_size << ids) > psz)
>> +				indirect =
> its_baser_check_indirect(baser);
>> +
>> +			if (indirect) {
>> +				/*
>> +				 * The size of the lvl2 table is equal to
> ITS
>> +				 * page size which is 'psz'. For computing
> lvl1
>> +				 * table size, subtract ID bits that
> sparse
>> +				 * lvl2 table from 'ids' which is reported
> by
>> +				 * ITS hardware times lvl1 table entry
> size.
>> +				 */
>> +				ids -= ilog2(psz / entry_size);
>> +				entry_size = GITS_LVL1_ENTRY_SIZE;
>> +			}
>> +
>>  			order = max(get_order(entry_size << ids), order);
>>  			if (order >= MAX_ORDER) {
>>  				order = MAX_ORDER - 1;
> This needs some splitting as well. Given that we're giving the
> Device table a special treatment, I think it'd make sense to give it
> its own function that would return the order of the the allocation and
> the indirect flag.
Okay, I'll move to a new function that handles device table specific code.
 
>> @@ -997,7 +1020,7 @@ static int its_alloc_tables(const char *node_name,
> struct its_node *its)
>>  			}
>>  		}
>>  
>> -		err = its_baser_setup(its, baser, order, 0);
>> +		err = its_baser_setup(its, baser, order, indirect);
>>  		if (err < 0) {
>>  			its_free_tables(its);
>>  			return err;
>> @@ -1187,10 +1210,57 @@ static struct its_baser *its_get_baser(struct
> its_node *its, u32 type)
>>  	return NULL;
>>  }
>>  
>> +static bool its_alloc_device_table(struct its_node *its, u32 dev_id)
>> +{
>> +	struct its_baser *baser;
>> +	struct page *page;
>> +	u32 esz, idx;
>> +	u64 *table;
>> +
>> +	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
>> +
>> +	/* Don't allow device id that exceeds ITS hardware limit */
>> +	if (!baser)
>> +		return (ilog2(dev_id) < its->device_ids);
>> +
>> +	/* Don't allow device id that exceeds single, flat table limit */
>> +	esz = GITS_BASER_ENTRY_SIZE(baser->val);
>> +	if (!(baser->val & GITS_BASER_INDIRECT))
>> +		return (dev_id < (PAGE_ORDER_TO_SIZE(baser->order) /
> esz));
>> +
>> +	/* Compute 1st level table index & check if that exceeds table
> limit */
>> +	idx = dev_id >> ilog2(baser->psz / esz);
>> +	if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) /
> GITS_LVL1_ENTRY_SIZE))
>> +		return false;
>> +
>> +	table = baser->base;
>> +
>> +	/* Allocate memory for 2nd level table */
>> +	if (!table[idx]) {
>> +		page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> get_order(baser->psz));
>> +		if (!page)
>> +			return false;
>> +
>> +		/* Flush memory to PoC if hardware doesn't support
> coherency */
>> +		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
>> +			__flush_dcache_area(page_address(page),
> baser->psz);
>> +
>> +		table[idx] = cpu_to_le64(page_to_phys(page) |
> GITS_BASER_VALID);
>> +
>> +		/* Flush memory to PoC if hardware doesn't support
> coherency */
>
> Please don't use the same comment twice, this is a bit misleading.
> Explain that the first clean/invalidate pushes out the data page, and
> that the second pushes out the pointer to that page.
>
I'll fix.
>> +		if (!(baser->val & GITS_BASER_SHAREABILITY_MASK))
>> +			__flush_dcache_area(table + idx,
> GITS_LVL1_ENTRY_SIZE);
>> +
>> +		/* Ensure updated table contents are visible to ITS
> hardware */
>> +		dsb(sy);
>> +	}
>> +
>> +	return true;
>> +}
>> +
>>  static struct its_device *its_create_device(struct its_node *its, u32
> dev_id,
>>  					    int nvecs)
>>  {
>> -	struct its_baser *baser;
>>  	struct its_device *dev;
>>  	unsigned long *lpi_map;
>>  	unsigned long flags;
>> @@ -1201,14 +1271,7 @@ static struct its_device
> *its_create_device(struct its_node *its, u32 dev_id,
>>  	int nr_ites;
>>  	int sz;
>>  
>> -	baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE);
>> -
>> -	/* Don't allow 'dev_id' that exceeds single, flat table limit */
>> -	if (baser) {
>> -		if (dev_id >= (PAGE_ORDER_TO_SIZE(baser->order) /
>> -			      GITS_BASER_ENTRY_SIZE(baser->val)))
>> -			return NULL;
>> -	} else if (ilog2(dev_id) >= its->device_ids)
>> +	if (!its_alloc_device_table(its, dev_id))
>>  		return NULL;
>>  
>>  	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
>
> Thanks,
>
> 	M.

-- 
Shanker Donthineni
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-06-04 14:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-09 20:58 [PATCH V3 0/2] Add two-level support to ITS device table Shanker Donthineni
2016-05-09 20:58 ` [PATCH V3 1/2] irqchip/gicv3-its: split its_alloc_tables() into two functions Shanker Donthineni
2016-06-04  8:53   ` Marc Zyngier
2016-06-04 14:30     ` Shanker Donthineni
2016-06-04 11:45   ` Marc Zyngier
2016-05-09 20:58 ` [PATCH V3 2/2] irqchip/gicv3-its: Implement two-level(indirect) device table support Shanker Donthineni
2016-06-04  9:09   ` Marc Zyngier
2016-06-04 14:42     ` Shanker Donthineni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).