All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
@ 2019-03-07  1:29 Gary Guo
  2019-03-08 14:39 ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Gary Guo @ 2019-03-07  1:29 UTC (permalink / raw)
  To: linux-riscv; +Cc: Palmer Dabbelt, Albert Ou

This patch rewrites the logic related to TLB flushing, both to cleanup
the code and to improve performance.

We now use sfence.vma variant with specified ASID and virtual address
whenever possible.  Even though only ASID 0 is used, it still improves
performance by preventing global mappings from being flushed from TLB.

This patch also includes a IPI-based remote TLB shootdown, which is useful
at this stage for testing because BBL/OpenSBI ignores operands of
sbi_remote_sfence_vma_asid and always perform a global TLB flush.
The IPI-based remote TLB shootdown is gated behind RISCV_TLBI_IPI config
and is off by default.

Signed-off-by: Xuan Guo <gary@garyguo.net>
---
 arch/riscv/Kconfig                |  40 +++++++++
 arch/riscv/include/asm/pgtable.h  |   2 +-
 arch/riscv/include/asm/tlbflush.h |  82 +++++++++--------
 arch/riscv/mm/Makefile            |   2 +
 arch/riscv/mm/context.c           |   8 +-
 arch/riscv/mm/tlbflush.c          | 144 ++++++++++++++++++++++++++++++
 6 files changed, 239 insertions(+), 39 deletions(-)
 create mode 100644 arch/riscv/mm/tlbflush.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ee833e6f5ccb..8203bec22610 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -196,6 +196,46 @@ config RISCV_ISA_C
 config RISCV_ISA_A
 	def_bool y
 
+
+menu "Virtual memory management"
+
+config RISCV_TLBI_IPI
+	bool "Use IPI instead of SBI for remote TLB shootdown"
+	default n
+	help
+	  Instead of using remote TLB shootdown interfaces provided by SBI,
+	  use IPI to handle remote TLB shootdown within Linux kernel.
+
+	  BBL/OpenSBI are currently ignoring ASID and address range provided
+	  by SBI call argument, and do a full TLB flush instead. This may
+	  negatively impact performance on implementations with page-level
+	  sfence.vma support.
+
+	  If you don't know what to do here, say N.
+
+
+config RISCV_TLBI_MAX_OPS
+	int "Max number of page-level sfence.vma per range TLB flush"
+	range 1 511
+	default 1
+	help
+	  This config specifies how many page-level sfence.vma can the Linux
+	  kernel issue when the kernel needs to flush a range from the TLB.
+	  If the required number of page-level sfence.vma exceeds this limit,
+	  a full sfence.vma is issued.
+
+	  Increase this number can negatively impact performance on
+	  implemntations where sfence.vma's address operand is ignored and
+	  always perform a global TLB flush.
+
+	  On the other hand, implementations with page-level TLB flush support
+	  can benefit from a larger number.
+
+	  If you don't know what to do here, keep the default value 1.
+
+endmenu
+
+
 menu "supported PMU type"
 	depends on PERF_EVENTS
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..47a8616b9de0 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
 	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
 	 */
-	local_flush_tlb_page(address);
+	local_flush_tlb_page(vma, address);
 }
 
 #define __HAVE_ARCH_PTE_SAME
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 54fee0cadb1e..f254237a3bda 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -1,6 +1,5 @@
 /*
- * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
- * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2019 Gary Guo, University of Cambridge
  *
  *   This program is free software; you can redistribute it and/or
  *   modify it under the terms of the GNU General Public License
@@ -16,7 +15,6 @@
 #define _ASM_RISCV_TLBFLUSH_H
 
 #include <linux/mm_types.h>
-#include <asm/smp.h>
 
 /*
  * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
@@ -27,53 +25,63 @@ static inline void local_flush_tlb_all(void)
 	__asm__ __volatile__ ("sfence.vma" : : : "memory");
 }
 
-/* Flush one page from local TLB */
-static inline void local_flush_tlb_page(unsigned long addr)
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
-	__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
+	/* Flush ASID 0 so that global mappings are not affected */
+	__asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory");
 }
 
-#ifndef CONFIG_SMP
-
-#define flush_tlb_all() local_flush_tlb_all()
-#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+		unsigned long addr)
+{
+	__asm__ __volatile__ ("sfence.vma %0, %1" : : "r" (addr), "r" (0) : "memory");
+}
 
-static inline void flush_tlb_range(struct vm_area_struct *vma,
+static inline void local_flush_tlb_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end)
 {
-	local_flush_tlb_all();
+	if (end - start > CONFIG_RISCV_TLBI_MAX_OPS * PAGE_SIZE) {
+		local_flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	while (start < end) {
+		__asm__ __volatile__ ("sfence.vma %0, %1" : : "r" (start), "r" (0) : "memory");
+		start += PAGE_SIZE;
+	}
 }
 
-#define flush_tlb_mm(mm) flush_tlb_all()
-
-#else /* CONFIG_SMP */
+static inline void local_flush_tlb_kernel_range(unsigned long start,
+	unsigned long end)
+{
+	if (end - start > CONFIG_RISCV_TLBI_MAX_OPS * PAGE_SIZE) {
+		local_flush_tlb_all();
+		return;
+	}
+
+	while (start < end) {
+		__asm__ __volatile__ ("sfence.vma %0" : : "r" (start) : "memory");
+		start += PAGE_SIZE;
+	}
+}
 
-#include <asm/sbi.h>
+#ifndef CONFIG_SMP
 
-static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
-				     unsigned long size)
-{
-	struct cpumask hmask;
+#define flush_tlb_all() local_flush_tlb_all()
+#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
+#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end)
+#define flush_tlb_kernel_range(start, end) local_flush_tlb_kernel_range(start, end)
 
-	cpumask_clear(&hmask);
-	riscv_cpuid_to_hartid_mask(cmask, &hmask);
-	sbi_remote_sfence_vma(hmask.bits, start, size);
-}
+#else /* CONFIG_SMP */
 
-#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
-#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
-#define flush_tlb_range(vma, start, end) \
-	remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
-#define flush_tlb_mm(mm) \
-	remote_sfence_vma(mm_cpumask(mm), 0, -1)
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #endif /* CONFIG_SMP */
 
-/* Flush a range of kernel pages */
-static inline void flush_tlb_kernel_range(unsigned long start,
-	unsigned long end)
-{
-	flush_tlb_all();
-}
-
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index d75b035786d6..7237f79ea0fc 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -4,3 +4,5 @@ obj-y += extable.o
 obj-y += ioremap.o
 obj-y += cacheflush.o
 obj-y += context.o
+
+obj-$(CONFIG_SMP) += tlbflush.o
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 4b9a20135008..ac4d6217c6b0 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -75,7 +75,13 @@ void switch_mm(struct mm_struct *prev,
 	 * privileged ISA 1.10 yet.
 	 */
 	csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
-	local_flush_tlb_all();
+
+	/*
+	 * sfence.vma after SATP write. We call it on MM context instead of
+	 * calling local_flush_tlb_all to prevent global mappings from being
+	 * affected.
+	 */
+	local_flush_tlb_mm(next);
 
 	flush_icache_deferred(next);
 }
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
new file mode 100644
index 000000000000..76cea33aa9c7
--- /dev/null
+++ b/arch/riscv/mm/tlbflush.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2019 Gary Guo, University of Cambridge
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <asm/sbi.h>
+
+/*
+ * BBL/OpenSBI are currently ignoring ASID and address range provided
+ * by SBI call argument, and do a full TLB flush instead.
+ *
+ * We provide an IPI-based remote shootdown implementation to improve
+ * performance on implementations with page-level sfence.vma, and also to
+ * allow testing of these implementations.
+ */
+
+#ifdef CONFIG_RISCV_TLBI_IPI
+
+struct tlbi {
+	unsigned long start;
+	unsigned long size;
+	unsigned long asid;
+};
+
+static void ipi_remote_sfence_vma(void *info)
+{
+	struct tlbi *data = (struct tlbi *) info;
+	unsigned long start = data->start;
+	unsigned long size = data->size;
+	unsigned long i;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		__asm__ __volatile__ ("sfence.vma %0" : : "r" (start + i) : "memory");
+	}
+}
+
+static void ipi_remote_sfence_vma_asid(void *info)
+{
+	struct tlbi *data = (struct tlbi *) info;
+	unsigned long asid = data->asid;
+	unsigned long start = data->start;
+	unsigned long size = data->size;
+	unsigned long i;
+
+	/* Flush entire MM context */
+	if (size == (unsigned long) -1) {
+		__asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (asid) : "memory");
+		return;
+	}
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		__asm__ __volatile__ ("sfence.vma %0, %1" : : "r" (start + i), "r" (asid) : "memory");
+	}
+}
+
+static inline void remote_sfence_vma(unsigned long start, unsigned long size)
+{
+	struct tlbi info = {
+		.start = start,
+		.size = size,
+	};
+	on_each_cpu(ipi_remote_sfence_vma, &info, 1);
+}
+
+static inline void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start,
+		unsigned long size, unsigned long asid)
+{
+	struct tlbi info = {
+		.start = start,
+		.size = size,
+		.asid = asid,
+	};
+	on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1);
+}
+
+#else /* CONFIG_RISCV_TLBI_SBI */
+
+static inline void remote_sfence_vma(unsigned long start, unsigned long size)
+{
+	sbi_remote_sfence_vma(NULL, start, size);
+}
+
+static inline void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start,
+		unsigned long size, unsigned long asid)
+{
+	cpumask_t hmask;
+
+	cpumask_clear(&hmask);
+	riscv_cpuid_to_hartid_mask(mask, &hmask);
+	sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
+}
+
+#endif /* CONFIG_RISCV_TLBI_SBI */
+
+void flush_tlb_all(void)
+{
+	sbi_remote_sfence_vma(NULL, 0, -1);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	remote_sfence_vma_asid(mm_cpumask(mm), 0, -1, 0);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma,
+		unsigned long addr)
+{
+	remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0);
+}
+
+
+void flush_tlb_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+{
+	if (end - start > CONFIG_RISCV_TLBI_MAX_OPS * PAGE_SIZE) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (end - start > CONFIG_RISCV_TLBI_MAX_OPS * PAGE_SIZE) {
+		flush_tlb_all();
+		return;
+	}
+
+	remote_sfence_vma(start, end - start);
+}
+
-- 
2.17.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-07  1:29 [PATCH 3/3] riscv: rewrite tlb flush for performance improvement Gary Guo
@ 2019-03-08 14:39 ` Christoph Hellwig
  2019-03-08 15:55   ` Gary Guo
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-03-08 14:39 UTC (permalink / raw)
  To: Gary Guo; +Cc: linux-riscv, Palmer Dabbelt, Albert Ou

> +menu "Virtual memory management"
> +
> +config RISCV_TLBI_IPI
> +	bool "Use IPI instead of SBI for remote TLB shootdown"
> +	default n
> +	help
> +	  Instead of using remote TLB shootdown interfaces provided by SBI,
> +	  use IPI to handle remote TLB shootdown within Linux kernel.
> +
> +	  BBL/OpenSBI are currently ignoring ASID and address range provided
> +	  by SBI call argument, and do a full TLB flush instead. This may
> +	  negatively impact performance on implementations with page-level
> +	  sfence.vma support.
> +
> +	  If you don't know what to do here, say N.

Requiring a kconfig here is rather sad.  For now I would just
switch entirely to your non-SBI version as doing SBI calls for this
is rather pointless to start with.  Either we get real architectural
hardware acceleration, or we might as well use IPIs ourselves.

That being said if there are strong arguments to keep the old code
I'd still prefer that to be runtime selectable.

> +
> +config RISCV_TLBI_MAX_OPS
> +	int "Max number of page-level sfence.vma per range TLB flush"
> +	range 1 511
> +	default 1
> +	help
> +	  This config specifies how many page-level sfence.vma can the Linux
> +	  kernel issue when the kernel needs to flush a range from the TLB.
> +	  If the required number of page-level sfence.vma exceeds this limit,
> +	  a full sfence.vma is issued.
> +
> +	  Increase this number can negatively impact performance on
> +	  implemntations where sfence.vma's address operand is ignored and
> +	  always perform a global TLB flush.
> +
> +	  On the other hand, implementations with page-level TLB flush support
> +	  can benefit from a larger number.
> +
> +	  If you don't know what to do here, keep the default value 1.

Again, I don't think hardcoding this makes any sense.  To make the
setting it needs to be overridable, and preferably provided by the
SBI code in some form (DT entry?).

> index 54fee0cadb1e..f254237a3bda 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -1,6 +1,5 @@
>  /*
> - * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
> - * Copyright (C) 2012 Regents of the University of California
> + * Copyright (C) 2019 Gary Guo, University of Cambridge

Unless you complete rewrite the file it is rather rude to remove
the existing copyright.  There still seem to be a decent amount
of at least comments left from the old codebase.

> +static inline void local_flush_tlb_page(struct vm_area_struct *vma,
> +		unsigned long addr)
> +{
> +	__asm__ __volatile__ ("sfence.vma %0, %1" : : "r" (addr), "r" (0) : "memory");
> +}

Please avoid lines over 80 chars.  Also I find inline assembly much
easier to read if each argument has its own line.

> +#ifndef CONFIG_SMP

If you rewrite this anyway can you switch the order around and use
an ifdef instead of ifndef?

> +extern void flush_tlb_all(void);
> +extern void flush_tlb_mm(struct mm_struct *mm);
> +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
> +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> +		unsigned long end);
> +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);

No need for all the externs.

> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> new file mode 100644
> index 000000000000..76cea33aa9c7
> --- /dev/null
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -0,0 +1,144 @@
> +/*
> + * Copyright (C) 2019 Gary Guo, University of Cambridge
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */

Please use an SPDX tag instead of the license boilerplate.

> +#include <linux/mm.h>
> +#include <asm/sbi.h>
> +
> +/*
> + * BBL/OpenSBI are currently ignoring ASID and address range provided
> + * by SBI call argument, and do a full TLB flush instead.

This is really information for the changelog, not really for the code.

> +static void ipi_remote_sfence_vma(void *info)
> +{
> +	struct tlbi *data = (struct tlbi *) info;

No need for the cast.

> +	if (size == (unsigned long) -1) {

Maybe add a symbolic RISCV_FLUSH_ALL macros for the magic -1?

> +void flush_tlb_page(struct vm_area_struct *vma,
> +		unsigned long addr)

The second argument easily fits onto the first line.

> +void flush_tlb_range(struct vm_area_struct *vma,
> +		unsigned long start, unsigned long end)

Same here.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-08 14:39 ` Christoph Hellwig
@ 2019-03-08 15:55   ` Gary Guo
  2019-03-08 16:31     ` Christoph Hellwig
  2019-03-08 16:46     ` Anup Patel
  0 siblings, 2 replies; 11+ messages in thread
From: Gary Guo @ 2019-03-08 15:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-riscv, Palmer Dabbelt, Albert Ou

On 08/03/2019 14:39, Christoph Hellwig wrote:
>> +menu "Virtual memory management"
>> +
>> +config RISCV_TLBI_IPI
>> +	bool "Use IPI instead of SBI for remote TLB shootdown"
>> +	default n
>> +	help
>> +	  Instead of using remote TLB shootdown interfaces provided by SBI,
>> +	  use IPI to handle remote TLB shootdown within Linux kernel.
>> +
>> +	  BBL/OpenSBI are currently ignoring ASID and address range provided
>> +	  by SBI call argument, and do a full TLB flush instead. This may
>> +	  negatively impact performance on implementations with page-level
>> +	  sfence.vma support.
>> +
>> +	  If you don't know what to do here, say N.
> 
> Requiring a kconfig here is rather sad.  For now I would just
> switch entirely to your non-SBI version as doing SBI calls for this
> is rather pointless to start with.  Either we get real architectural
> hardware acceleration, or we might as well use IPIs ourselves.
> 

The problem is that technically SBI should hide the details, and we 
should automatically, and transparently benefit from architectural 
hardware acceleration. The only issue is that the SBI isn't doing it 
well at the moment, which to me looks more like an erratum. Once SBI is 
doing its job, we should switch back and use SBI instead. We can change 
to default value to Y first, but I do believe we should still be able to 
use SBI-version if necessary, e.g. to test SBI implementation.


> That being said if there are strong arguments to keep the old code
> I'd still prefer that to be runtime selectable.
> 
>> +
>> +config RISCV_TLBI_MAX_OPS
>> +	int "Max number of page-level sfence.vma per range TLB flush"
>> +	range 1 511
>> +	default 1
>> +	help
>> +	  This config specifies how many page-level sfence.vma can the Linux
>> +	  kernel issue when the kernel needs to flush a range from the TLB.
>> +	  If the required number of page-level sfence.vma exceeds this limit,
>> +	  a full sfence.vma is issued.
>> +
>> +	  Increase this number can negatively impact performance on
>> +	  implemntations where sfence.vma's address operand is ignored and
>> +	  always perform a global TLB flush.
>> +
>> +	  On the other hand, implementations with page-level TLB flush support
>> +	  can benefit from a larger number.
>> +
>> +	  If you don't know what to do here, keep the default value 1.
> 
> Again, I don't think hardcoding this makes any sense.  To make the
> setting it needs to be overridable, and preferably provided by the
> SBI code in some form (DT entry?).
> 

Currently we have no way to retrieve the ideal size - so I just provide 
a hardcoded constant. What do you suggest for short-term before we have 
a reliable way to retrieve this value?

>> +#ifndef CONFIG_SMP
> 
> If you rewrite this anyway can you switch the order around and use
> an ifdef instead of ifndef?

I did a fast grep on other archs, and it seems that at least for 
CONFIG_SMP, it's more common to put non-SMP part first.

>> +#include <linux/mm.h>
>> +#include <asm/sbi.h>
>> +
>> +/*
>> + * BBL/OpenSBI are currently ignoring ASID and address range provided
>> + * by SBI call argument, and do a full TLB flush instead.
> 
> This is really information for the changelog, not really for the code.
> 
>> +static void ipi_remote_sfence_vma(void *info)
>> +{
>> +	struct tlbi *data = (struct tlbi *) info;
> 
> No need for the cast.

Thanks for pointing out. Writing code for C++ for too long I already had 
muscle memory for the cast :D

>> +	if (size == (unsigned long) -1) {
> 
> Maybe add a symbolic RISCV_FLUSH_ALL macros for the magic -1?

This is just to unify with SBI (currently we also use -1 in SBI-calls). 
Since the argument is unsigned long, from 0 to -1 basically just 
represent the entire address space. Do you think it's better to replace 
all SBI with -1 argument with a macro as well?
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-08 15:55   ` Gary Guo
@ 2019-03-08 16:31     ` Christoph Hellwig
  2019-03-08 16:50       ` Anup Patel
  2019-03-08 16:46     ` Anup Patel
  1 sibling, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-03-08 16:31 UTC (permalink / raw)
  To: Gary Guo; +Cc: Christoph Hellwig, linux-riscv, Palmer Dabbelt, Albert Ou

On Fri, Mar 08, 2019 at 03:55:43PM +0000, Gary Guo wrote:
> The problem is that technically SBI should hide the details, and we 
> should automatically, and transparently benefit from architectural 
> hardware acceleration.

But it doesn't.  The SBI effectively requires an additional trap,
which makes the whole thing a little pointless.  I'd much rather
have well working code in the kernel, and if people want to do
hardware based TLB flushing to propose a well defined Supervisor
ABI extension for it.

> Currently we have no way to retrieve the ideal size - so I just provide 
> a hardcoded constant. What do you suggest for short-term before we have 
> a reliable way to retrieve this value?

Kernel boot option.

> >> +#ifndef CONFIG_SMP
> > 
> > If you rewrite this anyway can you switch the order around and use
> > an ifdef instead of ifndef?
> 
> I did a fast grep on other archs, and it seems that at least for 
> CONFIG_SMP, it's more common to put non-SMP part first.

Most of that probably is copy and paste from x86 which had the
non-SMP version only first.  Still not a good idea.

> >> +	if (size == (unsigned long) -1) {
> > 
> > Maybe add a symbolic RISCV_FLUSH_ALL macros for the magic -1?
> 
> This is just to unify with SBI (currently we also use -1 in SBI-calls). 
> Since the argument is unsigned long, from 0 to -1 basically just 
> represent the entire address space. Do you think it's better to replace 
> all SBI with -1 argument with a macro as well?

Note that I'm fine keeping the (unsigned long)-1 as the actual ABI, it
just would nice to give it a symbolic name.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-08 15:55   ` Gary Guo
  2019-03-08 16:31     ` Christoph Hellwig
@ 2019-03-08 16:46     ` Anup Patel
  1 sibling, 0 replies; 11+ messages in thread
From: Anup Patel @ 2019-03-08 16:46 UTC (permalink / raw)
  To: Gary Guo
  Cc: Anup Patel, Palmer Dabbelt, Christoph Hellwig, Atish Patra,
	Albert Ou, linux-riscv

On Fri, Mar 8, 2019 at 9:25 PM Gary Guo <gary@garyguo.net> wrote:
>
> On 08/03/2019 14:39, Christoph Hellwig wrote:
> >> +menu "Virtual memory management"
> >> +
> >> +config RISCV_TLBI_IPI
> >> +    bool "Use IPI instead of SBI for remote TLB shootdown"
> >> +    default n
> >> +    help
> >> +      Instead of using remote TLB shootdown interfaces provided by SBI,
> >> +      use IPI to handle remote TLB shootdown within Linux kernel.
> >> +
> >> +      BBL/OpenSBI are currently ignoring ASID and address range provided
> >> +      by SBI call argument, and do a full TLB flush instead. This may
> >> +      negatively impact performance on implementations with page-level
> >> +      sfence.vma support.
> >> +
> >> +      If you don't know what to do here, say N.
> >
> > Requiring a kconfig here is rather sad.  For now I would just
> > switch entirely to your non-SBI version as doing SBI calls for this
> > is rather pointless to start with.  Either we get real architectural
> > hardware acceleration, or we might as well use IPIs ourselves.
> >
>
> The problem is that technically SBI should hide the details, and we
> should automatically, and transparently benefit from architectural
> hardware acceleration. The only issue is that the SBI isn't doing it
> well at the moment, which to me looks more like an erratum. Once SBI is
> doing its job, we should switch back and use SBI instead. We can change
> to default value to Y first, but I do believe we should still be able to
> use SBI-version if necessary, e.g. to test SBI implementation.

Currently OpenSBI is in feature parity with BBL and now we are trying
to do things in a better way. We will certainly implement
SBI_REMOTE_SFENCE_VMA and SBI_REMOTE_SFENCE_VMA_ASID
in a better way instead of flushing everything.

Here's the link to track OpenSBI work:
https://github.com/riscv/opensbi/issues/87

I agree with you SBI calls should hide details of remote SFENCE
so that in-future some platform can have platform specific way to
accelerate remote SFENCE which is abstracted by SBI calls.

There is one more advantage in using SBI calls (apart from above)
for virtualization. Let's say we have a Guest/VM with more than one
VCPUs. It is possible that Guest VCPUs run on same host CPU in
which case the hypervisor can avoid unnecessary IPIs if Guest Linux
is using SBI calls for remote SFENCE. Now if Guest Linux uses its
own IPI based implementation then it will do redundant IPIs when
Guest VCPUs are running on same host CPU.

in summary, we should certainly use SBI calls for remote SFENCE to:
1. Allow SBI calls to hide platform specific acceleration
2. Be more virtualization friendly allowing hypervisor to optimize IPIs

>
>
> > That being said if there are strong arguments to keep the old code
> > I'd still prefer that to be runtime selectable.

We should certainly keep it runtime selectable.

Regards,
Anup

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-08 16:31     ` Christoph Hellwig
@ 2019-03-08 16:50       ` Anup Patel
  2019-03-08 17:18         ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Anup Patel @ 2019-03-08 16:50 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Palmer Dabbelt, Anup Patel, Atish Patra, Albert Ou, Gary Guo,
	linux-riscv

On Fri, Mar 8, 2019 at 10:01 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Fri, Mar 08, 2019 at 03:55:43PM +0000, Gary Guo wrote:
> > The problem is that technically SBI should hide the details, and we
> > should automatically, and transparently benefit from architectural
> > hardware acceleration.
>
> But it doesn't.  The SBI effectively requires an additional trap,
> which makes the whole thing a little pointless.  I'd much rather
> have well working code in the kernel, and if people want to do
> hardware based TLB flushing to propose a well defined Supervisor
> ABI extension for it.

For your information, the IPI triggering is also done using SBI call
so we are not saving much by not using SBI calls for remote SFENCE.

If a platform implements remote TLB flushing in HW then OpenSBI
can use that HW mechanism instead of IPIs. For Linux, the platform
remote TLB flush acceleration will be abstracted by SBI calls.

Regards,
Anup

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-08 16:50       ` Anup Patel
@ 2019-03-08 17:18         ` Christoph Hellwig
  2019-03-10 20:46           ` Anup Patel
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-03-08 17:18 UTC (permalink / raw)
  To: Anup Patel
  Cc: Albert Ou, Palmer Dabbelt, Christoph Hellwig, Atish Patra,
	Anup Patel, Gary Guo, linux-riscv

On Fri, Mar 08, 2019 at 10:20:49PM +0530, Anup Patel wrote:
> For your information, the IPI triggering is also done using SBI call
> so we are not saving much by not using SBI calls for remote SFENCE.

Not yet.  But all the usual implementation just implement IPIs by
MMIO writes to the CLINT.  Which could easily be delegated and we'd
save the trap.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-08 17:18         ` Christoph Hellwig
@ 2019-03-10 20:46           ` Anup Patel
  2019-03-10 20:49             ` Anup Patel
  0 siblings, 1 reply; 11+ messages in thread
From: Anup Patel @ 2019-03-10 20:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Palmer Dabbelt, Anup Patel, Atish Patra, Albert Ou, Gary Guo,
	linux-riscv

On Fri, Mar 8, 2019 at 10:48 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Fri, Mar 08, 2019 at 10:20:49PM +0530, Anup Patel wrote:
> > For your information, the IPI triggering is also done using SBI call
> > so we are not saving much by not using SBI calls for remote SFENCE.
>
> Not yet.  But all the usual implementation just implement IPIs by
> MMIO writes to the CLINT.  Which could easily be delegated and we'd
> save the trap.

True but CLINT is not the defacto way triggering IPIs. Other SOCs will
have their own way of triggering IPIs. That's why is it is abstracted via
SBI call.

Regards,
Anup

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-10 20:46           ` Anup Patel
@ 2019-03-10 20:49             ` Anup Patel
  2019-03-11 15:53               ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Anup Patel @ 2019-03-10 20:49 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Palmer Dabbelt, Anup Patel, Atish Patra, Albert Ou, Gary Guo,
	linux-riscv

On Mon, Mar 11, 2019 at 2:16 AM Anup Patel <anup@brainfault.org> wrote:
>
> On Fri, Mar 8, 2019 at 10:48 PM Christoph Hellwig <hch@infradead.org> wrote:
> >
> > On Fri, Mar 08, 2019 at 10:20:49PM +0530, Anup Patel wrote:
> > > For your information, the IPI triggering is also done using SBI call
> > > so we are not saving much by not using SBI calls for remote SFENCE.
> >
> > Not yet.  But all the usual implementation just implement IPIs by
> > MMIO writes to the CLINT.  Which could easily be delegated and we'd
> > save the trap.
>
> True but CLINT is not the defacto way triggering IPIs. Other SOCs will
> have their own way of triggering IPIs. That's why is it is abstracted via
> SBI call.

There are also use-cases to run two separate OSes on same SOC
without virtualization. In such cases, both OSes cannot have access
to CLINT for triggering IPIs.

Regards,
Anup

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-10 20:49             ` Anup Patel
@ 2019-03-11 15:53               ` Christoph Hellwig
  2019-03-11 16:28                 ` Anup Patel
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-03-11 15:53 UTC (permalink / raw)
  To: Anup Patel
  Cc: Albert Ou, Palmer Dabbelt, Christoph Hellwig, Atish Patra,
	Anup Patel, Gary Guo, linux-riscv

On Mon, Mar 11, 2019 at 02:19:54AM +0530, Anup Patel wrote:
> There are also use-cases to run two separate OSes on same SOC
> without virtualization. In such cases, both OSes cannot have access
> to CLINT for triggering IPIs.

And sometimes pigs can fly (no, really!).  But we should optimize
for performance on hardware we have not build some ivory towers
for grand architectures of the future.

And with that I don't mean cutting corners and weird micro-optimization,
but to think hard what layering makes sense.

Having to trap into machine mode software to flush TLBs does not make
sense in any normal architecture.

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] riscv: rewrite tlb flush for performance improvement
  2019-03-11 15:53               ` Christoph Hellwig
@ 2019-03-11 16:28                 ` Anup Patel
  0 siblings, 0 replies; 11+ messages in thread
From: Anup Patel @ 2019-03-11 16:28 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Palmer Dabbelt, Anup Patel, Atish Patra, Albert Ou, Gary Guo,
	linux-riscv

On Mon, Mar 11, 2019 at 9:23 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Mon, Mar 11, 2019 at 02:19:54AM +0530, Anup Patel wrote:
> > There are also use-cases to run two separate OSes on same SOC
> > without virtualization. In such cases, both OSes cannot have access
> > to CLINT for triggering IPIs.
>
> And sometimes pigs can fly (no, really!).  But we should optimize
> for performance on hardware we have not build some ivory towers
> for grand architectures of the future.

IMHO, we should not restrict use-cases of Linux RISC-V kernel. The
Linux RISC-V kernel should have mechanism to do remote TLB flushes
using both SBI calls as well as using it's own IPIs. We should let users
decide what mechanism of remote TLB flushes they want.

>
> And with that I don't mean cutting corners and weird micro-optimization,
> but to think hard what layering makes sense.
>
> Having to trap into machine mode software to flush TLBs does not make
> sense in any normal architecture.

That's your view of a "normal architecture". How I see this is that RISC-V
is allowing CPU designers to implement their own way of remote TLB flush
which a very unique thing.

Regards,
Anup

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-03-11 16:28 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-07  1:29 [PATCH 3/3] riscv: rewrite tlb flush for performance improvement Gary Guo
2019-03-08 14:39 ` Christoph Hellwig
2019-03-08 15:55   ` Gary Guo
2019-03-08 16:31     ` Christoph Hellwig
2019-03-08 16:50       ` Anup Patel
2019-03-08 17:18         ` Christoph Hellwig
2019-03-10 20:46           ` Anup Patel
2019-03-10 20:49             ` Anup Patel
2019-03-11 15:53               ` Christoph Hellwig
2019-03-11 16:28                 ` Anup Patel
2019-03-08 16:46     ` Anup Patel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.