linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
@ 2020-11-19 21:42 Ashish Kalra
  2020-11-23 17:06 ` Borislav Petkov
  0 siblings, 1 reply; 13+ messages in thread
From: Ashish Kalra @ 2020-11-19 21:42 UTC (permalink / raw)
  To: konrad.wilk
  Cc: hch, tglx, mingo, bp, hpa, x86, luto, peterz, dave.hansen, iommu,
	linux-kernel, brijesh.singh, Thomas.Lendacky, jon.grimm,
	rientjes

From: Ashish Kalra <ashish.kalra@amd.com>

For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
SEV uses SWIOTLB to make this happen without requiring changes to device
drivers.  However, depending on workload being run, the default 64MB of
SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
for DMA, resulting in I/O errors and/or performance degradation for
high I/O workloads.

Increase the default size of SWIOTLB for SEV guests using a minimum
value of 128MB and a maximum value of 512MB, determining on amount
of provisioned guest memory.

Using late_initcall() interface to invoke swiotlb_adjust() does not
work as the size adjustment needs to be done before mem_encrypt_init()
and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
hence calling it explicitly from setup_arch().

The SWIOTLB default size adjustment is added as an architecture specific
interface/callback to allow architectures such as those supporting memory
encryption to adjust/expand SWIOTLB size for their use.

v5 fixed build errors and warnings as
Reported-by: kbuild test robot <lkp@intel.com>

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/kernel/setup.c   |  2 ++
 arch/x86/mm/mem_encrypt.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/swiotlb.h   |  6 ++++++
 kernel/dma/swiotlb.c      | 24 ++++++++++++++++++++++++
 4 files changed, 64 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3511736fbc74..b073d58dd4a3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
 	if (boot_cpu_has(X86_FEATURE_GBPAGES))
 		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 
+	swiotlb_adjust();
+
 	/*
 	 * Reserve memory for crash kernel after SRAT is parsed so that it
 	 * won't consume hotpluggable memory.
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3f248f0d0e07..c79a0d761db5 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
 }
 
 /* Architecture __weak replacement functions */
+unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
+{
+	unsigned long size = 0;
+
+	/*
+	 * For SEV, all DMA has to occur via shared/unencrypted pages.
+	 * SEV uses SWOTLB to make this happen without changing device
+	 * drivers. However, depending on the workload being run, the
+	 * default 64MB of SWIOTLB may not be enough & SWIOTLB may
+	 * run out of buffers for DMA, resulting in I/O errors and/or
+	 * performance degradation especially with high I/O workloads.
+	 * Increase the default size of SWIOTLB for SEV guests using
+	 * a minimum value of 128MB and a maximum value of 512MB,
+	 * depending on amount of provisioned guest memory.
+	 */
+	if (sev_active()) {
+		phys_addr_t total_mem = memblock_phys_mem_size();
+
+		if (total_mem <= SZ_1G)
+			size = max(iotlb_default_size, (unsigned long) SZ_128M);
+		else if (total_mem <= SZ_4G)
+			size = max(iotlb_default_size, (unsigned long) SZ_256M);
+		else
+			size = max(iotlb_default_size, (unsigned long) SZ_512M);
+
+		pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV platform",
+			size >> 20);
+	}
+
+	return size;
+}
+
 void __init mem_encrypt_init(void)
 {
 	if (!sme_me_mask)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 046bb94bd4d6..46a693f76f1e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -33,6 +33,7 @@ extern void swiotlb_init(int verbose);
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 extern unsigned long swiotlb_nr_tbl(void);
 unsigned long swiotlb_size_or_default(void);
+unsigned long __init arch_swiotlb_adjust(unsigned long size);
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
 extern void __init swiotlb_update_mem_attributes(void);
 
@@ -80,6 +81,7 @@ void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(void);
+void __init swiotlb_adjust(void);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
@@ -102,6 +104,10 @@ static inline bool is_swiotlb_active(void)
 {
 	return false;
 }
+
+static inline void swiotlb_adjust(void)
+{
+}
 #endif /* CONFIG_SWIOTLB */
 
 extern void swiotlb_print_info(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c19379fabd20..3be9a19ea0a5 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -163,6 +163,30 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
+unsigned long __init __weak arch_swiotlb_adjust(unsigned long size)
+{
+	return 0;
+}
+
+void __init swiotlb_adjust(void)
+{
+	unsigned long size;
+
+	/*
+	 * If swiotlb parameter has not been specified, give a chance to
+	 * architectures such as those supporting memory encryption to
+	 * adjust/expand SWIOTLB size for their use.
+	 */
+	if (!io_tlb_nslabs) {
+		size = arch_swiotlb_adjust(IO_TLB_DEFAULT_SIZE);
+		if (size) {
+			size = ALIGN(size, 1 << IO_TLB_SHIFT);
+			io_tlb_nslabs = size >> IO_TLB_SHIFT;
+			io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		}
+	}
+}
+
 void swiotlb_print_info(void)
 {
 	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-19 21:42 [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests Ashish Kalra
@ 2020-11-23 17:06 ` Borislav Petkov
  2020-11-23 17:56   ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 13+ messages in thread
From: Borislav Petkov @ 2020-11-23 17:06 UTC (permalink / raw)
  To: Ashish Kalra
  Cc: konrad.wilk, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Thu, Nov 19, 2020 at 09:42:05PM +0000, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
> 
> For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> SEV uses SWIOTLB to make this happen without requiring changes to device
> drivers.  However, depending on workload being run, the default 64MB of
> SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> for DMA, resulting in I/O errors and/or performance degradation for
> high I/O workloads.
> 
> Increase the default size of SWIOTLB for SEV guests using a minimum
> value of 128MB and a maximum value of 512MB, determining on amount
> of provisioned guest memory.

That sentence needs massaging.

> Using late_initcall() interface to invoke swiotlb_adjust() does not
> work as the size adjustment needs to be done before mem_encrypt_init()
> and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> hence calling it explicitly from setup_arch().

"hence call it ... "

> 
> The SWIOTLB default size adjustment is added as an architecture specific

"... is added... " needs to be "Add ..."

> interface/callback to allow architectures such as those supporting memory
> encryption to adjust/expand SWIOTLB size for their use.
> 
> v5 fixed build errors and warnings as
> Reported-by: kbuild test robot <lkp@intel.com>
> 
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>  arch/x86/kernel/setup.c   |  2 ++
>  arch/x86/mm/mem_encrypt.c | 32 ++++++++++++++++++++++++++++++++
>  include/linux/swiotlb.h   |  6 ++++++
>  kernel/dma/swiotlb.c      | 24 ++++++++++++++++++++++++
>  4 files changed, 64 insertions(+)
> 
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 3511736fbc74..b073d58dd4a3 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
>  	if (boot_cpu_has(X86_FEATURE_GBPAGES))
>  		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
>  
> +	swiotlb_adjust();
> +
>  	/*
>  	 * Reserve memory for crash kernel after SRAT is parsed so that it
>  	 * won't consume hotpluggable memory.
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 3f248f0d0e07..c79a0d761db5 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
>  }
>  
>  /* Architecture __weak replacement functions */
> +unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
> +{
> +	unsigned long size = 0;

	unsigned long size = iotlb_default_size;

> +
> +	/*
> +	 * For SEV, all DMA has to occur via shared/unencrypted pages.
> +	 * SEV uses SWOTLB to make this happen without changing device
> +	 * drivers. However, depending on the workload being run, the
> +	 * default 64MB of SWIOTLB may not be enough & SWIOTLB may
						     ^

Use words pls, not "&".


> +	 * run out of buffers for DMA, resulting in I/O errors and/or
> +	 * performance degradation especially with high I/O workloads.
> +	 * Increase the default size of SWIOTLB for SEV guests using
> +	 * a minimum value of 128MB and a maximum value of 512MB,
> +	 * depending on amount of provisioned guest memory.
> +	 */
> +	if (sev_active()) {
> +		phys_addr_t total_mem = memblock_phys_mem_size();
> +
> +		if (total_mem <= SZ_1G)
> +			size = max(iotlb_default_size, (unsigned long) SZ_128M);
> +		else if (total_mem <= SZ_4G)
> +			size = max(iotlb_default_size, (unsigned long) SZ_256M);
> +		else
> +			size = max(iotlb_default_size, (unsigned long) SZ_512M);
> +
> +		pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV platform",

just "... for SEV" - no need for "platform".

...

> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index c19379fabd20..3be9a19ea0a5 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -163,6 +163,30 @@ unsigned long swiotlb_size_or_default(void)
>  	return size ? size : (IO_TLB_DEFAULT_SIZE);
>  }
>  
> +unsigned long __init __weak arch_swiotlb_adjust(unsigned long size)
> +{
> +	return 0;

That, of course, needs to return size, not 0.

> +}
> +
> +void __init swiotlb_adjust(void)
> +{
> +	unsigned long size;
> +
> +	/*
> +	 * If swiotlb parameter has not been specified, give a chance to
> +	 * architectures such as those supporting memory encryption to
> +	 * adjust/expand SWIOTLB size for their use.
> +	 */

And when you preset the function-local argument "size" with the size
coming in as the size argument of arch_swiotlb_adjust()...

> +	if (!io_tlb_nslabs) {
> +		size = arch_swiotlb_adjust(IO_TLB_DEFAULT_SIZE);
> +		if (size) {

... you don't have to do if (size) here either but simply use size to
compute io_tlb_nslabs, I'd say.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 17:06 ` Borislav Petkov
@ 2020-11-23 17:56   ` Konrad Rzeszutek Wilk
  2020-11-23 18:02     ` Borislav Petkov
  2020-11-23 22:56     ` Ashish Kalra
  0 siblings, 2 replies; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2020-11-23 17:56 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ashish Kalra, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Mon, Nov 23, 2020 at 06:06:47PM +0100, Borislav Petkov wrote:
> On Thu, Nov 19, 2020 at 09:42:05PM +0000, Ashish Kalra wrote:
> > From: Ashish Kalra <ashish.kalra@amd.com>
> > 
> > For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> > SEV uses SWIOTLB to make this happen without requiring changes to device
> > drivers.  However, depending on workload being run, the default 64MB of
> > SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> > for DMA, resulting in I/O errors and/or performance degradation for
> > high I/O workloads.
> > 
> > Increase the default size of SWIOTLB for SEV guests using a minimum
> > value of 128MB and a maximum value of 512MB, determining on amount
> > of provisioned guest memory.
> 
> That sentence needs massaging.
> 
> > Using late_initcall() interface to invoke swiotlb_adjust() does not
> > work as the size adjustment needs to be done before mem_encrypt_init()
> > and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> > hence calling it explicitly from setup_arch().
> 
> "hence call it ... "
> 
> > 
> > The SWIOTLB default size adjustment is added as an architecture specific
> 
> "... is added... " needs to be "Add ..."
> 
> > interface/callback to allow architectures such as those supporting memory
> > encryption to adjust/expand SWIOTLB size for their use.
> > 
> > v5 fixed build errors and warnings as
> > Reported-by: kbuild test robot <lkp@intel.com>
> > 
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >  arch/x86/kernel/setup.c   |  2 ++
> >  arch/x86/mm/mem_encrypt.c | 32 ++++++++++++++++++++++++++++++++
> >  include/linux/swiotlb.h   |  6 ++++++
> >  kernel/dma/swiotlb.c      | 24 ++++++++++++++++++++++++
> >  4 files changed, 64 insertions(+)
> > 
> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > index 3511736fbc74..b073d58dd4a3 100644
> > --- a/arch/x86/kernel/setup.c
> > +++ b/arch/x86/kernel/setup.c
> > @@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
> >  	if (boot_cpu_has(X86_FEATURE_GBPAGES))
> >  		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
> >  
> > +	swiotlb_adjust();
> > +
> >  	/*
> >  	 * Reserve memory for crash kernel after SRAT is parsed so that it
> >  	 * won't consume hotpluggable memory.
> > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > index 3f248f0d0e07..c79a0d761db5 100644
> > --- a/arch/x86/mm/mem_encrypt.c
> > +++ b/arch/x86/mm/mem_encrypt.c
> > @@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
> >  }
> >  
> >  /* Architecture __weak replacement functions */
> > +unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
> > +{
> > +	unsigned long size = 0;
> 
> 	unsigned long size = iotlb_default_size;
> 
> > +
> > +	/*
> > +	 * For SEV, all DMA has to occur via shared/unencrypted pages.
> > +	 * SEV uses SWOTLB to make this happen without changing device
> > +	 * drivers. However, depending on the workload being run, the
> > +	 * default 64MB of SWIOTLB may not be enough & SWIOTLB may
> 						     ^
> 
> Use words pls, not "&".
> 
> 
> > +	 * run out of buffers for DMA, resulting in I/O errors and/or
> > +	 * performance degradation especially with high I/O workloads.
> > +	 * Increase the default size of SWIOTLB for SEV guests using
> > +	 * a minimum value of 128MB and a maximum value of 512MB,
> > +	 * depending on amount of provisioned guest memory.
> > +	 */
> > +	if (sev_active()) {
> > +		phys_addr_t total_mem = memblock_phys_mem_size();
> > +
> > +		if (total_mem <= SZ_1G)
> > +			size = max(iotlb_default_size, (unsigned long) SZ_128M);
> > +		else if (total_mem <= SZ_4G)
> > +			size = max(iotlb_default_size, (unsigned long) SZ_256M);

That is eating 128MB for 1GB, aka 12% of the guest memory allocated statically for this.

And for guests that are 2GB, that is 12% until it gets to 3GB when it is 8%
and then 6% at 4GB.

I would prefer this to be based on your memory count, that is 6% of total
memory. And then going forward we can allocate memory _after_ boot and then stich
the late SWIOTLB pool and allocate on demand.



> > +		else
> > +			size = max(iotlb_default_size, (unsigned long) SZ_512M);
> > +
> > +		pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV platform",
> 
> just "... for SEV" - no need for "platform".
> 
> ...
> 
> > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > index c19379fabd20..3be9a19ea0a5 100644
> > --- a/kernel/dma/swiotlb.c
> > +++ b/kernel/dma/swiotlb.c
> > @@ -163,6 +163,30 @@ unsigned long swiotlb_size_or_default(void)
> >  	return size ? size : (IO_TLB_DEFAULT_SIZE);
> >  }
> >  
> > +unsigned long __init __weak arch_swiotlb_adjust(unsigned long size)
> > +{
> > +	return 0;
> 
> That, of course, needs to return size, not 0.

This is not going to work for TDX. I think having a registration
to SWIOTLB to have this function would be better going forward.

As in there will be a swiotlb_register_adjuster() which AMD SEV
code can call at start, also TDX can do it (and other platforms).


> 
> > +}
> > +
> > +void __init swiotlb_adjust(void)
> > +{
> > +	unsigned long size;
> > +
> > +	/*
> > +	 * If swiotlb parameter has not been specified, give a chance to
> > +	 * architectures such as those supporting memory encryption to
> > +	 * adjust/expand SWIOTLB size for their use.
> > +	 */
> 
> And when you preset the function-local argument "size" with the size
> coming in as the size argument of arch_swiotlb_adjust()...
> 
> > +	if (!io_tlb_nslabs) {
> > +		size = arch_swiotlb_adjust(IO_TLB_DEFAULT_SIZE);
> > +		if (size) {
> 
> ... you don't have to do if (size) here either but simply use size to
> compute io_tlb_nslabs, I'd say.
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 17:56   ` Konrad Rzeszutek Wilk
@ 2020-11-23 18:02     ` Borislav Petkov
  2020-11-23 18:43       ` Konrad Rzeszutek Wilk
  2020-11-23 22:56     ` Ashish Kalra
  1 sibling, 1 reply; 13+ messages in thread
From: Borislav Petkov @ 2020-11-23 18:02 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Ashish Kalra, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Mon, Nov 23, 2020 at 12:56:32PM -0500, Konrad Rzeszutek Wilk wrote:
> This is not going to work for TDX. I think having a registration
> to SWIOTLB to have this function would be better going forward.
> 
> As in there will be a swiotlb_register_adjuster() which AMD SEV
> code can call at start, also TDX can do it (and other platforms).

Oh do tell. It doesn't need to adjust size?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 18:02     ` Borislav Petkov
@ 2020-11-23 18:43       ` Konrad Rzeszutek Wilk
  2020-11-24  9:00         ` Borislav Petkov
  0 siblings, 1 reply; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2020-11-23 18:43 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Ashish Kalra, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Mon, Nov 23, 2020 at 07:02:15PM +0100, Borislav Petkov wrote:
> On Mon, Nov 23, 2020 at 12:56:32PM -0500, Konrad Rzeszutek Wilk wrote:
> > This is not going to work for TDX. I think having a registration
> > to SWIOTLB to have this function would be better going forward.
> > 
> > As in there will be a swiotlb_register_adjuster() which AMD SEV
> > code can call at start, also TDX can do it (and other platforms).
> 
> Oh do tell. It doesn't need to adjust size?

I am assuming that TDX is going to have the same exact issue that 
AMD SEV will have.

Are you recommending to have an unified x86 specific callback
where we check if it:

 - CPUID_AMD_SEV or CPUID_INTEL_TDX is set, and
 - No vIOMMU present, then we adjust the size?

> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 17:56   ` Konrad Rzeszutek Wilk
  2020-11-23 18:02     ` Borislav Petkov
@ 2020-11-23 22:56     ` Ashish Kalra
  2020-11-24  9:04       ` Borislav Petkov
  2020-11-24 23:46       ` Ashish Kalra
  1 sibling, 2 replies; 13+ messages in thread
From: Ashish Kalra @ 2020-11-23 22:56 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Borislav Petkov, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

Hello Konrad,

On Mon, Nov 23, 2020 at 12:56:32PM -0500, Konrad Rzeszutek Wilk wrote:
> On Mon, Nov 23, 2020 at 06:06:47PM +0100, Borislav Petkov wrote:
> > On Thu, Nov 19, 2020 at 09:42:05PM +0000, Ashish Kalra wrote:
> > > From: Ashish Kalra <ashish.kalra@amd.com>
> > > 
> > > For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> > > SEV uses SWIOTLB to make this happen without requiring changes to device
> > > drivers.  However, depending on workload being run, the default 64MB of
> > > SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> > > for DMA, resulting in I/O errors and/or performance degradation for
> > > high I/O workloads.
> > > 
> > > Increase the default size of SWIOTLB for SEV guests using a minimum
> > > value of 128MB and a maximum value of 512MB, determining on amount
> > > of provisioned guest memory.
> > 
> > That sentence needs massaging.
> > 
> > > Using late_initcall() interface to invoke swiotlb_adjust() does not
> > > work as the size adjustment needs to be done before mem_encrypt_init()
> > > and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> > > hence calling it explicitly from setup_arch().
> > 
> > "hence call it ... "
> > 
> > > 
> > > The SWIOTLB default size adjustment is added as an architecture specific
> > 
> > "... is added... " needs to be "Add ..."
> > 
> > > interface/callback to allow architectures such as those supporting memory
> > > encryption to adjust/expand SWIOTLB size for their use.
> > > 
> > > v5 fixed build errors and warnings as
> > > Reported-by: kbuild test robot <lkp@intel.com>
> > > 
> > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > ---
> > >  arch/x86/kernel/setup.c   |  2 ++
> > >  arch/x86/mm/mem_encrypt.c | 32 ++++++++++++++++++++++++++++++++
> > >  include/linux/swiotlb.h   |  6 ++++++
> > >  kernel/dma/swiotlb.c      | 24 ++++++++++++++++++++++++
> > >  4 files changed, 64 insertions(+)
> > > 
> > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > > index 3511736fbc74..b073d58dd4a3 100644
> > > --- a/arch/x86/kernel/setup.c
> > > +++ b/arch/x86/kernel/setup.c
> > > @@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
> > >  	if (boot_cpu_has(X86_FEATURE_GBPAGES))
> > >  		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
> > >  
> > > +	swiotlb_adjust();
> > > +
> > >  	/*
> > >  	 * Reserve memory for crash kernel after SRAT is parsed so that it
> > >  	 * won't consume hotpluggable memory.
> > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > > index 3f248f0d0e07..c79a0d761db5 100644
> > > --- a/arch/x86/mm/mem_encrypt.c
> > > +++ b/arch/x86/mm/mem_encrypt.c
> > > @@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
> > >  }
> > >  
> > >  /* Architecture __weak replacement functions */
> > > +unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
> > > +{
> > > +	unsigned long size = 0;
> > 
> > 	unsigned long size = iotlb_default_size;
> > 
> > > +
> > > +	/*
> > > +	 * For SEV, all DMA has to occur via shared/unencrypted pages.
> > > +	 * SEV uses SWOTLB to make this happen without changing device
> > > +	 * drivers. However, depending on the workload being run, the
> > > +	 * default 64MB of SWIOTLB may not be enough & SWIOTLB may
> > 						     ^
> > 
> > Use words pls, not "&".
> > 
> > 
> > > +	 * run out of buffers for DMA, resulting in I/O errors and/or
> > > +	 * performance degradation especially with high I/O workloads.
> > > +	 * Increase the default size of SWIOTLB for SEV guests using
> > > +	 * a minimum value of 128MB and a maximum value of 512MB,
> > > +	 * depending on amount of provisioned guest memory.
> > > +	 */
> > > +	if (sev_active()) {
> > > +		phys_addr_t total_mem = memblock_phys_mem_size();
> > > +
> > > +		if (total_mem <= SZ_1G)
> > > +			size = max(iotlb_default_size, (unsigned long) SZ_128M);
> > > +		else if (total_mem <= SZ_4G)
> > > +			size = max(iotlb_default_size, (unsigned long) SZ_256M);
> 
> That is eating 128MB for 1GB, aka 12% of the guest memory allocated statically for this.
> 
> And for guests that are 2GB, that is 12% until it gets to 3GB when it is 8%
> and then 6% at 4GB.
> 
> I would prefer this to be based on your memory count, that is 6% of total
> memory. And then going forward we can allocate memory _after_ boot and then stich
> the late SWIOTLB pool and allocate on demand.
> 
> 
Ok. 

As i mentioned earlier, the patch was initially based on using a % of guest memory,
as below:

+#define SEV_ADJUST_SWIOTLB_SIZE_PERCENT        5
+#define SEV_ADJUST_SWIOTLB_SIZE_MAX    (1UL << 30)
...
...
+       if (sev_active() && !io_tlb_nslabs) {
+               unsigned long total_mem = get_num_physpages() <<
+ PAGE_SHIFT;
+
+               default_size = total_mem *
+                       SEV_ADJUST_SWIOTLB_SIZE_PERCENT / 100;
+
+               default_size = ALIGN(default_size, 1 << IO_TLB_SHIFT);
+
+               default_size = clamp_val(default_size, IO_TLB_DEFAULT_SIZE,
+                       SEV_ADJUST_SWIOTLB_SIZE_MAX);
+       }

So a similar logic can be applied here.

> 
> > > +		else
> > > +			size = max(iotlb_default_size, (unsigned long) SZ_512M);
> > > +
> > > +		pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV platform",
> > 
> > just "... for SEV" - no need for "platform".
> > 
> > ...
> > 
> > > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > > index c19379fabd20..3be9a19ea0a5 100644
> > > --- a/kernel/dma/swiotlb.c
> > > +++ b/kernel/dma/swiotlb.c
> > > @@ -163,6 +163,30 @@ unsigned long swiotlb_size_or_default(void)
> > >  	return size ? size : (IO_TLB_DEFAULT_SIZE);
> > >  }
> > >  
> > > +unsigned long __init __weak arch_swiotlb_adjust(unsigned long size)
> > > +{
> > > +	return 0;
> > 
> > That, of course, needs to return size, not 0.
> 
> This is not going to work for TDX. I think having a registration
> to SWIOTLB to have this function would be better going forward.
> 
> As in there will be a swiotlb_register_adjuster() which AMD SEV
> code can call at start, also TDX can do it (and other platforms).
> 

The question is how does mem_encrypt_init() work ?

That uses a similar logic as arch_swiotlb_adjust() as a "__weak"
function and i am sure it will also need to have added support for TDX,
can't both arch_swiotlb_adjust() and mem_encrypt_init() have specific
checks for active AMD/INTEL memory encryption technology and accordingly
perform actions, as mem_encrypt_init() currently checks for
sev_active().

init/main.c: 

void __init __weak mem_encrypt_init(void) { }

start_kernel()
{
   ..
   mem_encrypt_init();
   ..
}

arch/x86/mm/mem_encrypt.c: 
    
/* Architecture __weak replacement functions */

void __init mem_encrypt_init(void)
{
        if (!sme_me_mask)
                return;

        /* Call into SWIOTLB to update the SWIOTLB DMA buffers */
        swiotlb_update_mem_attributes();

        /*
         * With SEV, we need to unroll the rep string I/O instructions.
         */
        if (sev_active())
                static_branch_enable(&sev_enable_key);
...
...

Thanks,
Ashish

> > 
> > > +}
> > > +
> > > +void __init swiotlb_adjust(void)
> > > +{
> > > +	unsigned long size;
> > > +
> > > +	/*
> > > +	 * If swiotlb parameter has not been specified, give a chance to
> > > +	 * architectures such as those supporting memory encryption to
> > > +	 * adjust/expand SWIOTLB size for their use.
> > > +	 */
> > 
> > And when you preset the function-local argument "size" with the size
> > coming in as the size argument of arch_swiotlb_adjust()...
> > 
> > > +	if (!io_tlb_nslabs) {
> > > +		size = arch_swiotlb_adjust(IO_TLB_DEFAULT_SIZE);
> > > +		if (size) {
> > 
> > ... you don't have to do if (size) here either but simply use size to
> > compute io_tlb_nslabs, I'd say.
> > 
> > Thx.
> > 
> > -- 
> > Regards/Gruss,
> >     Boris.
> > 
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&amp;data=04%7C01%7CAshish.Kalra%40amd.com%7Cebd4a85f98f44bdfcb5408d88fd8dfac%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417508926083910%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Ub9PjAPzhDWr7K2iQggTAXwgg4VbORxP%2F%2Fcg6gQreCc%3D&amp;reserved=0

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 18:43       ` Konrad Rzeszutek Wilk
@ 2020-11-24  9:00         ` Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2020-11-24  9:00 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Ashish Kalra, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Mon, Nov 23, 2020 at 01:43:27PM -0500, Konrad Rzeszutek Wilk wrote:
> I am assuming that TDX is going to have the same exact issue that 
> AMD SEV will have.
> 
> Are you recommending to have an unified x86 specific callback
> where we check if it:
> 
>  - CPUID_AMD_SEV or CPUID_INTEL_TDX is set, and
>  - No vIOMMU present, then we adjust the size?

I'm thinking do it correct right now and when TDX appears on the horizon
requesting this adjusted to TDX, then change it. Like we always do.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 22:56     ` Ashish Kalra
@ 2020-11-24  9:04       ` Borislav Petkov
  2020-11-24  9:25         ` Kalra, Ashish
  2020-11-24 23:46       ` Ashish Kalra
  1 sibling, 1 reply; 13+ messages in thread
From: Borislav Petkov @ 2020-11-24  9:04 UTC (permalink / raw)
  To: Ashish Kalra
  Cc: Konrad Rzeszutek Wilk, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Mon, Nov 23, 2020 at 10:56:31PM +0000, Ashish Kalra wrote:
> As i mentioned earlier, the patch was initially based on using a % of
> guest memory,

Can you figure out how much the guest memory is and then allocate a
percentage?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-24  9:04       ` Borislav Petkov
@ 2020-11-24  9:25         ` Kalra, Ashish
  2020-11-24  9:38           ` Borislav Petkov
  0 siblings, 1 reply; 13+ messages in thread
From: Kalra, Ashish @ 2020-11-24  9:25 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Konrad Rzeszutek Wilk, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, Singh, Brijesh, Lendacky,
	Thomas, Grimm, Jon, rientjes



> On Nov 24, 2020, at 3:04 AM, Borislav Petkov <bp@alien8.de> wrote:
> 
> On Mon, Nov 23, 2020 at 10:56:31PM +0000, Ashish Kalra wrote:
>> As i mentioned earlier, the patch was initially based on using a % of
>> guest memory,
> 
> Can you figure out how much the guest memory is and then allocate a
> percentage?

But what will be the criteria to figure out this percentage?

As I mentioned earlier, this can be made as complicated as possible by adding all kind of heuristics but without any predictable performance gain.

Or it can be kept simple by using a static percentage value.

Thanks,
Ashish

> -- 
> Regards/Gruss,
>    Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&amp;data=04%7C01%7Cashish.kalra%40amd.com%7C0766422bcee64d2eb57208d89057f620%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637418054797950694%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=%2FEFuRGOMOu4BZUkPOd9rxam%2BBA3nXj4tdRFFj3nQ47U%3D&amp;reserved=0

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-24  9:25         ` Kalra, Ashish
@ 2020-11-24  9:38           ` Borislav Petkov
  2020-11-24  9:43             ` Kalra, Ashish
  0 siblings, 1 reply; 13+ messages in thread
From: Borislav Petkov @ 2020-11-24  9:38 UTC (permalink / raw)
  To: Kalra, Ashish
  Cc: Konrad Rzeszutek Wilk, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, Singh, Brijesh, Lendacky,
	Thomas, Grimm, Jon, rientjes

On Tue, Nov 24, 2020 at 09:25:06AM +0000, Kalra, Ashish wrote:
> But what will be the criteria to figure out this percentage?
>
> As I mentioned earlier, this can be made as complicated as possible by
> adding all kind of heuristics but without any predictable performance
> gain.
>
> Or it can be kept simple by using a static percentage value.

Yes, static percentage number based on the guest memory. X% of the guest
memory is used for SWIOTLB.

Since you use sev_active(), it means the size computation is done in the
guest so that SWIOTLB size is per-guest. Yes?

If so, you can simply take, say, 5% of the guest memory's size and use
that for SWIOTLB buffers. Or 6 or X or whatever.

Makes sense?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-24  9:38           ` Borislav Petkov
@ 2020-11-24  9:43             ` Kalra, Ashish
  0 siblings, 0 replies; 13+ messages in thread
From: Kalra, Ashish @ 2020-11-24  9:43 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Konrad Rzeszutek Wilk, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, Singh, Brijesh, Lendacky,
	Thomas, Grimm, Jon, rientjes



> On Nov 24, 2020, at 3:38 AM, Borislav Petkov <bp@alien8.de> wrote:
> 
> On Tue, Nov 24, 2020 at 09:25:06AM +0000, Kalra, Ashish wrote:
>> But what will be the criteria to figure out this percentage?
>> 
>> As I mentioned earlier, this can be made as complicated as possible by
>> adding all kind of heuristics but without any predictable performance
>> gain.
>> 
>> Or it can be kept simple by using a static percentage value.
> 
> Yes, static percentage number based on the guest memory. X% of the guest
> memory is used for SWIOTLB.
> 
> Since you use sev_active(), it means the size computation is done in the
> guest so that SWIOTLB size is per-guest. Yes?

Yes

> 
> If so, you can simply take, say, 5% of the guest memory's size and use
> that for SWIOTLB buffers. Or 6 or X or whatever.
> 
> Makes sense?

Sure it does.

Thanks,
Ashish

> 
> -- 
> Regards/Gruss,
>    Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&amp;data=04%7C01%7CAshish.Kalra%40amd.com%7C91b611b21d3049d70ca908d8905cbc37%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637418075284000564%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=JvnUfskyd9xdsal4oYkSYW5ouL2b4cs%2Fo2oYi9KrkFo%3D&amp;reserved=0

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-23 22:56     ` Ashish Kalra
  2020-11-24  9:04       ` Borislav Petkov
@ 2020-11-24 23:46       ` Ashish Kalra
  2020-12-01 20:42         ` Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 13+ messages in thread
From: Ashish Kalra @ 2020-11-24 23:46 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Borislav Petkov, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

Hello Konrad, 

On Mon, Nov 23, 2020 at 10:56:31PM +0000, Ashish Kalra wrote:
> Hello Konrad,
> 
> On Mon, Nov 23, 2020 at 12:56:32PM -0500, Konrad Rzeszutek Wilk wrote:
> > On Mon, Nov 23, 2020 at 06:06:47PM +0100, Borislav Petkov wrote:
> > > On Thu, Nov 19, 2020 at 09:42:05PM +0000, Ashish Kalra wrote:
> > > > From: Ashish Kalra <ashish.kalra@amd.com>
> > > > 
> > > > For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> > > > SEV uses SWIOTLB to make this happen without requiring changes to device
> > > > drivers.  However, depending on workload being run, the default 64MB of
> > > > SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> > > > for DMA, resulting in I/O errors and/or performance degradation for
> > > > high I/O workloads.
> > > > 
> > > > Increase the default size of SWIOTLB for SEV guests using a minimum
> > > > value of 128MB and a maximum value of 512MB, determining on amount
> > > > of provisioned guest memory.
> > > 
> > > That sentence needs massaging.
> > > 
> > > > Using late_initcall() interface to invoke swiotlb_adjust() does not
> > > > work as the size adjustment needs to be done before mem_encrypt_init()
> > > > and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> > > > hence calling it explicitly from setup_arch().
> > > 
> > > "hence call it ... "
> > > 
> > > > 
> > > > The SWIOTLB default size adjustment is added as an architecture specific
> > > 
> > > "... is added... " needs to be "Add ..."
> > > 
> > > > interface/callback to allow architectures such as those supporting memory
> > > > encryption to adjust/expand SWIOTLB size for their use.
> > > > 
> > > > v5 fixed build errors and warnings as
> > > > Reported-by: kbuild test robot <lkp@intel.com>
> > > > 
> > > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > ---
> > > >  arch/x86/kernel/setup.c   |  2 ++
> > > >  arch/x86/mm/mem_encrypt.c | 32 ++++++++++++++++++++++++++++++++
> > > >  include/linux/swiotlb.h   |  6 ++++++
> > > >  kernel/dma/swiotlb.c      | 24 ++++++++++++++++++++++++
> > > >  4 files changed, 64 insertions(+)
> > > > 
> > > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > > > index 3511736fbc74..b073d58dd4a3 100644
> > > > --- a/arch/x86/kernel/setup.c
> > > > +++ b/arch/x86/kernel/setup.c
> > > > @@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
> > > >  	if (boot_cpu_has(X86_FEATURE_GBPAGES))
> > > >  		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
> > > >  
> > > > +	swiotlb_adjust();
> > > > +
> > > >  	/*
> > > >  	 * Reserve memory for crash kernel after SRAT is parsed so that it
> > > >  	 * won't consume hotpluggable memory.
> > > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > > > index 3f248f0d0e07..c79a0d761db5 100644
> > > > --- a/arch/x86/mm/mem_encrypt.c
> > > > +++ b/arch/x86/mm/mem_encrypt.c
> > > > @@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
> > > >  }
> > > >  
> > > >  /* Architecture __weak replacement functions */
> > > > +unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
> > > > +{
> > > > +	unsigned long size = 0;
> > > 
> > > 	unsigned long size = iotlb_default_size;
> > > 
> > > > +
> > > > +	/*
> > > > +	 * For SEV, all DMA has to occur via shared/unencrypted pages.
> > > > +	 * SEV uses SWOTLB to make this happen without changing device
> > > > +	 * drivers. However, depending on the workload being run, the
> > > > +	 * default 64MB of SWIOTLB may not be enough & SWIOTLB may
> > > 						     ^
> > > 
> > > Use words pls, not "&".
> > > 
> > > 
> > > > +	 * run out of buffers for DMA, resulting in I/O errors and/or
> > > > +	 * performance degradation especially with high I/O workloads.
> > > > +	 * Increase the default size of SWIOTLB for SEV guests using
> > > > +	 * a minimum value of 128MB and a maximum value of 512MB,
> > > > +	 * depending on amount of provisioned guest memory.
> > > > +	 */
> > > > +	if (sev_active()) {
> > > > +		phys_addr_t total_mem = memblock_phys_mem_size();
> > > > +
> > > > +		if (total_mem <= SZ_1G)
> > > > +			size = max(iotlb_default_size, (unsigned long) SZ_128M);
> > > > +		else if (total_mem <= SZ_4G)
> > > > +			size = max(iotlb_default_size, (unsigned long) SZ_256M);
> > 
> > That is eating 128MB for 1GB, aka 12% of the guest memory allocated statically for this.
> > 
> > And for guests that are 2GB, that is 12% until it gets to 3GB when it is 8%
> > and then 6% at 4GB.
> > 
> > I would prefer this to be based on your memory count, that is 6% of total
> > memory. And then going forward we can allocate memory _after_ boot and then stich
> > the late SWIOTLB pool and allocate on demand.
> > 
> > 
> Ok. 
> 
> As i mentioned earlier, the patch was initially based on using a % of guest memory,
> as below:
> 
> +#define SEV_ADJUST_SWIOTLB_SIZE_PERCENT        5
> +#define SEV_ADJUST_SWIOTLB_SIZE_MAX    (1UL << 30)
> ...
> ...
> +       if (sev_active() && !io_tlb_nslabs) {
> +               unsigned long total_mem = get_num_physpages() <<
> + PAGE_SHIFT;
> +
> +               default_size = total_mem *
> +                       SEV_ADJUST_SWIOTLB_SIZE_PERCENT / 100;
> +
> +               default_size = ALIGN(default_size, 1 << IO_TLB_SHIFT);
> +
> +               default_size = clamp_val(default_size, IO_TLB_DEFAULT_SIZE,
> +                       SEV_ADJUST_SWIOTLB_SIZE_MAX);
> +       }
> 
> So a similar logic can be applied here.
> 
> > 
> > > > +		else
> > > > +			size = max(iotlb_default_size, (unsigned long) SZ_512M);
> > > > +
> > > > +		pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV platform",
> > > 
> > > just "... for SEV" - no need for "platform".
> > > 
> > > ...
> > > 
> > > > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > > > index c19379fabd20..3be9a19ea0a5 100644
> > > > --- a/kernel/dma/swiotlb.c
> > > > +++ b/kernel/dma/swiotlb.c
> > > > @@ -163,6 +163,30 @@ unsigned long swiotlb_size_or_default(void)
> > > >  	return size ? size : (IO_TLB_DEFAULT_SIZE);
> > > >  }
> > > >  
> > > > +unsigned long __init __weak arch_swiotlb_adjust(unsigned long size)
> > > > +{
> > > > +	return 0;
> > > 
> > > That, of course, needs to return size, not 0.
> > 
> > This is not going to work for TDX. I think having a registration
> > to SWIOTLB to have this function would be better going forward.
> > 
> > As in there will be a swiotlb_register_adjuster() which AMD SEV
> > code can call at start, also TDX can do it (and other platforms).
> > 
> 
> The question is how does mem_encrypt_init() work ?
> 
> That uses a similar logic as arch_swiotlb_adjust() as a "__weak"
> function and i am sure it will also need to have added support for TDX,
> can't both arch_swiotlb_adjust() and mem_encrypt_init() have specific
> checks for active AMD/INTEL memory encryption technology and accordingly
> perform actions, as mem_encrypt_init() currently checks for
> sev_active().
> 
> init/main.c: 
> 
> void __init __weak mem_encrypt_init(void) { }
> 
> start_kernel()
> {
>    ..
>    mem_encrypt_init();
>    ..
> }
> 
> arch/x86/mm/mem_encrypt.c: 
>     
> /* Architecture __weak replacement functions */
> 
> void __init mem_encrypt_init(void)
> {
>         if (!sme_me_mask)
>                 return;
> 
>         /* Call into SWIOTLB to update the SWIOTLB DMA buffers */
>         swiotlb_update_mem_attributes();
> 
>         /*
>          * With SEV, we need to unroll the rep string I/O instructions.
>          */
>         if (sev_active())
>                 static_branch_enable(&sev_enable_key);
> ...
> ...
> 

Your thoughts on this ?

Thanks,
Ashish

> 
> > > 
> > > > +}
> > > > +
> > > > +void __init swiotlb_adjust(void)
> > > > +{
> > > > +	unsigned long size;
> > > > +
> > > > +	/*
> > > > +	 * If swiotlb parameter has not been specified, give a chance to
> > > > +	 * architectures such as those supporting memory encryption to
> > > > +	 * adjust/expand SWIOTLB size for their use.
> > > > +	 */
> > > 
> > > And when you preset the function-local argument "size" with the size
> > > coming in as the size argument of arch_swiotlb_adjust()...
> > > 
> > > > +	if (!io_tlb_nslabs) {
> > > > +		size = arch_swiotlb_adjust(IO_TLB_DEFAULT_SIZE);
> > > > +		if (size) {
> > > 
> > > ... you don't have to do if (size) here either but simply use size to
> > > compute io_tlb_nslabs, I'd say.
> > > 
> > > Thx.
> > > 
> > > -- 
> > > Regards/Gruss,
> > >     Boris.
> > > 
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&amp;data=04%7C01%7CAshish.Kalra%40amd.com%7Cebd4a85f98f44bdfcb5408d88fd8dfac%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417508926083910%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Ub9PjAPzhDWr7K2iQggTAXwgg4VbORxP%2F%2Fcg6gQreCc%3D&amp;reserved=0

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.
  2020-11-24 23:46       ` Ashish Kalra
@ 2020-12-01 20:42         ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2020-12-01 20:42 UTC (permalink / raw)
  To: Ashish Kalra
  Cc: Borislav Petkov, hch, tglx, mingo, hpa, x86, luto, peterz,
	dave.hansen, iommu, linux-kernel, brijesh.singh, Thomas.Lendacky,
	jon.grimm, rientjes

On Tue, Nov 24, 2020 at 11:46:22PM +0000, Ashish Kalra wrote:
> Hello Konrad, 
> 
> On Mon, Nov 23, 2020 at 10:56:31PM +0000, Ashish Kalra wrote:
> > Hello Konrad,
> > 
> > On Mon, Nov 23, 2020 at 12:56:32PM -0500, Konrad Rzeszutek Wilk wrote:
> > > On Mon, Nov 23, 2020 at 06:06:47PM +0100, Borislav Petkov wrote:
> > > > On Thu, Nov 19, 2020 at 09:42:05PM +0000, Ashish Kalra wrote:
> > > > > From: Ashish Kalra <ashish.kalra@amd.com>
> > > > > 
> > > > > For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> > > > > SEV uses SWIOTLB to make this happen without requiring changes to device
> > > > > drivers.  However, depending on workload being run, the default 64MB of
> > > > > SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> > > > > for DMA, resulting in I/O errors and/or performance degradation for
> > > > > high I/O workloads.
> > > > > 
> > > > > Increase the default size of SWIOTLB for SEV guests using a minimum
> > > > > value of 128MB and a maximum value of 512MB, determining on amount
> > > > > of provisioned guest memory.
> > > > 
> > > > That sentence needs massaging.
> > > > 
> > > > > Using late_initcall() interface to invoke swiotlb_adjust() does not
> > > > > work as the size adjustment needs to be done before mem_encrypt_init()
> > > > > and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> > > > > hence calling it explicitly from setup_arch().
> > > > 
> > > > "hence call it ... "
> > > > 
> > > > > 
> > > > > The SWIOTLB default size adjustment is added as an architecture specific
> > > > 
> > > > "... is added... " needs to be "Add ..."
> > > > 
> > > > > interface/callback to allow architectures such as those supporting memory
> > > > > encryption to adjust/expand SWIOTLB size for their use.
> > > > > 
> > > > > v5 fixed build errors and warnings as
> > > > > Reported-by: kbuild test robot <lkp@intel.com>
> > > > > 
> > > > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > > ---
> > > > >  arch/x86/kernel/setup.c   |  2 ++
> > > > >  arch/x86/mm/mem_encrypt.c | 32 ++++++++++++++++++++++++++++++++
> > > > >  include/linux/swiotlb.h   |  6 ++++++
> > > > >  kernel/dma/swiotlb.c      | 24 ++++++++++++++++++++++++
> > > > >  4 files changed, 64 insertions(+)
> > > > > 
> > > > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > > > > index 3511736fbc74..b073d58dd4a3 100644
> > > > > --- a/arch/x86/kernel/setup.c
> > > > > +++ b/arch/x86/kernel/setup.c
> > > > > @@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
> > > > >  	if (boot_cpu_has(X86_FEATURE_GBPAGES))
> > > > >  		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
> > > > >  
> > > > > +	swiotlb_adjust();
> > > > > +
> > > > >  	/*
> > > > >  	 * Reserve memory for crash kernel after SRAT is parsed so that it
> > > > >  	 * won't consume hotpluggable memory.
> > > > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > > > > index 3f248f0d0e07..c79a0d761db5 100644
> > > > > --- a/arch/x86/mm/mem_encrypt.c
> > > > > +++ b/arch/x86/mm/mem_encrypt.c
> > > > > @@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
> > > > >  }
> > > > >  
> > > > >  /* Architecture __weak replacement functions */
> > > > > +unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
> > > > > +{
> > > > > +	unsigned long size = 0;
> > > > 
> > > > 	unsigned long size = iotlb_default_size;
> > > > 
> > > > > +
> > > > > +	/*
> > > > > +	 * For SEV, all DMA has to occur via shared/unencrypted pages.
> > > > > +	 * SEV uses SWOTLB to make this happen without changing device
> > > > > +	 * drivers. However, depending on the workload being run, the
> > > > > +	 * default 64MB of SWIOTLB may not be enough & SWIOTLB may
> > > > 						     ^
> > > > 
> > > > Use words pls, not "&".
> > > > 
> > > > 
> > > > > +	 * run out of buffers for DMA, resulting in I/O errors and/or
> > > > > +	 * performance degradation especially with high I/O workloads.
> > > > > +	 * Increase the default size of SWIOTLB for SEV guests using
> > > > > +	 * a minimum value of 128MB and a maximum value of 512MB,
> > > > > +	 * depending on amount of provisioned guest memory.
> > > > > +	 */
> > > > > +	if (sev_active()) {
> > > > > +		phys_addr_t total_mem = memblock_phys_mem_size();
> > > > > +
> > > > > +		if (total_mem <= SZ_1G)
> > > > > +			size = max(iotlb_default_size, (unsigned long) SZ_128M);
> > > > > +		else if (total_mem <= SZ_4G)
> > > > > +			size = max(iotlb_default_size, (unsigned long) SZ_256M);
> > > 
> > > That is eating 128MB for 1GB, aka 12% of the guest memory allocated statically for this.
> > > 
> > > And for guests that are 2GB, that is 12% until it gets to 3GB when it is 8%
> > > and then 6% at 4GB.
> > > 
> > > I would prefer this to be based on your memory count, that is 6% of total
> > > memory. And then going forward we can allocate memory _after_ boot and then stich
> > > the late SWIOTLB pool and allocate on demand.
> > > 
> > > 
> > Ok. 
> > 
> > As i mentioned earlier, the patch was initially based on using a % of guest memory,
> > as below:
> > 
> > +#define SEV_ADJUST_SWIOTLB_SIZE_PERCENT        5
> > +#define SEV_ADJUST_SWIOTLB_SIZE_MAX    (1UL << 30)
> > ...
> > ...
> > +       if (sev_active() && !io_tlb_nslabs) {
> > +               unsigned long total_mem = get_num_physpages() <<
> > + PAGE_SHIFT;
> > +
> > +               default_size = total_mem *
> > +                       SEV_ADJUST_SWIOTLB_SIZE_PERCENT / 100;
> > +
> > +               default_size = ALIGN(default_size, 1 << IO_TLB_SHIFT);
> > +
> > +               default_size = clamp_val(default_size, IO_TLB_DEFAULT_SIZE,
> > +                       SEV_ADJUST_SWIOTLB_SIZE_MAX);
> > +       }
> > 
> > So a similar logic can be applied here.
> > 
> > > 
> > > > > +		else
> > > > > +			size = max(iotlb_default_size, (unsigned long) SZ_512M);
> > > > > +
> > > > > +		pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV platform",
> > > > 
> > > > just "... for SEV" - no need for "platform".
> > > > 
> > > > ...
> > > > 
> > > > > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > > > > index c19379fabd20..3be9a19ea0a5 100644
> > > > > --- a/kernel/dma/swiotlb.c
> > > > > +++ b/kernel/dma/swiotlb.c
> > > > > @@ -163,6 +163,30 @@ unsigned long swiotlb_size_or_default(void)
> > > > >  	return size ? size : (IO_TLB_DEFAULT_SIZE);
> > > > >  }
> > > > >  
> > > > > +unsigned long __init __weak arch_swiotlb_adjust(unsigned long size)
> > > > > +{
> > > > > +	return 0;
> > > > 
> > > > That, of course, needs to return size, not 0.
> > > 
> > > This is not going to work for TDX. I think having a registration
> > > to SWIOTLB to have this function would be better going forward.
> > > 
> > > As in there will be a swiotlb_register_adjuster() which AMD SEV
> > > code can call at start, also TDX can do it (and other platforms).
> > > 
> > 
> > The question is how does mem_encrypt_init() work ?
> > 
> > That uses a similar logic as arch_swiotlb_adjust() as a "__weak"
> > function and i am sure it will also need to have added support for TDX,
> > can't both arch_swiotlb_adjust() and mem_encrypt_init() have specific
> > checks for active AMD/INTEL memory encryption technology and accordingly
> > perform actions, as mem_encrypt_init() currently checks for
> > sev_active().
> > 
> > init/main.c: 
> > 
> > void __init __weak mem_encrypt_init(void) { }
> > 
> > start_kernel()
> > {
> >    ..
> >    mem_encrypt_init();
> >    ..
> > }
> > 
> > arch/x86/mm/mem_encrypt.c: 
> >     
> > /* Architecture __weak replacement functions */
> > 
> > void __init mem_encrypt_init(void)
> > {
> >         if (!sme_me_mask)
> >                 return;
> > 
> >         /* Call into SWIOTLB to update the SWIOTLB DMA buffers */
> >         swiotlb_update_mem_attributes();
> > 
> >         /*
> >          * With SEV, we need to unroll the rep string I/O instructions.
> >          */
> >         if (sev_active())
> >                 static_branch_enable(&sev_enable_key);
> > ...
> > ...
> > 
> 
> Your thoughts on this ?

That looks quite sensible. 
> 
> Thanks,
> Ashish
> 
> > 
> > > > 
> > > > > +}
> > > > > +
> > > > > +void __init swiotlb_adjust(void)
> > > > > +{
> > > > > +	unsigned long size;
> > > > > +
> > > > > +	/*
> > > > > +	 * If swiotlb parameter has not been specified, give a chance to
> > > > > +	 * architectures such as those supporting memory encryption to
> > > > > +	 * adjust/expand SWIOTLB size for their use.
> > > > > +	 */
> > > > 
> > > > And when you preset the function-local argument "size" with the size
> > > > coming in as the size argument of arch_swiotlb_adjust()...
> > > > 
> > > > > +	if (!io_tlb_nslabs) {
> > > > > +		size = arch_swiotlb_adjust(IO_TLB_DEFAULT_SIZE);
> > > > > +		if (size) {
> > > > 
> > > > ... you don't have to do if (size) here either but simply use size to
> > > > compute io_tlb_nslabs, I'd say.
> > > > 
> > > > Thx.
> > > > 
> > > > -- 
> > > > Regards/Gruss,
> > > >     Boris.
> > > > 
> > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&amp;data=04%7C01%7CAshish.Kalra%40amd.com%7Cebd4a85f98f44bdfcb5408d88fd8dfac%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417508926083910%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Ub9PjAPzhDWr7K2iQggTAXwgg4VbORxP%2F%2Fcg6gQreCc%3D&amp;reserved=0

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-12-01 20:42 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-19 21:42 [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests Ashish Kalra
2020-11-23 17:06 ` Borislav Petkov
2020-11-23 17:56   ` Konrad Rzeszutek Wilk
2020-11-23 18:02     ` Borislav Petkov
2020-11-23 18:43       ` Konrad Rzeszutek Wilk
2020-11-24  9:00         ` Borislav Petkov
2020-11-23 22:56     ` Ashish Kalra
2020-11-24  9:04       ` Borislav Petkov
2020-11-24  9:25         ` Kalra, Ashish
2020-11-24  9:38           ` Borislav Petkov
2020-11-24  9:43             ` Kalra, Ashish
2020-11-24 23:46       ` Ashish Kalra
2020-12-01 20:42         ` Konrad Rzeszutek Wilk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).