linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
       [not found] ` <1535026656-8450-2-git-send-email-eric.auger@redhat.com>
@ 2018-08-23 15:25   ` Auger Eric
  2018-08-31 13:11     ` Jean-Philippe Brucker
       [not found]   ` <A2975661238FB949B60364EF0F2C257439CCE9EE@SHSMSX104.ccr.corp.intel.com>
  1 sibling, 1 reply; 21+ messages in thread
From: Auger Eric @ 2018-08-23 15:25 UTC (permalink / raw)
  To: eric.auger.pro, iommu, linux-kernel, kvm, kvmarm, joro,
	alex.williamson, jean-philippe.brucker, jacob.jun.pan, yi.l.liu,
	will.deacon, robin.murphy
  Cc: marc.zyngier, christoffer.dall, peter.maydell

Hi,

On 08/23/2018 02:17 PM, Eric Auger wrote:
> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> 
> In virtualization use case, when a guest is assigned
> a PCI host device, protected by a virtual IOMMU on a guest,
> the physical IOMMU must be programmed to be consistent with
> the guest mappings. If the physical IOMMU supports two
> translation stages it makes sense to program guest mappings
> onto the first stage while to host owns the stage 2.
> 
> In that case, it is mandated to trap on guest configuration
> settings and pass those to the physical iommu driver.
> 
> This patch adds a new API to the iommu subsystem that allows
> to bind and unbind the guest configuration data to the host.
> 
> A generic iommu_guest_stage_config struct is introduced in
> a new iommu.h uapi header. This is going to be used by the VFIO
> user API. We foresee at least 2 specialization of this struct,
> for PASID table passing and ARM SMMUv3.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> This patch generalizes the API introduced by Jacob & co-authors in
> https://lwn.net/Articles/754331/
> ---
>  drivers/iommu/iommu.c      | 19 +++++++++++++++
>  include/linux/iommu.h      | 23 ++++++++++++++++++
>  include/uapi/linux/iommu.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 101 insertions(+)
>  create mode 100644 include/uapi/linux/iommu.h
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 63b3756..5156172 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1326,6 +1326,25 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>  
> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct device *dev,
> +			   struct iommu_guest_stage_config *cfg)
> +{
> +	if (unlikely(!domain->ops->bind_guest_stage))
> +		return -ENODEV;
> +
> +	return domain->ops->bind_guest_stage(domain, dev, cfg);
> +}
> +EXPORT_SYMBOL_GPL(iommu_bind_guest_stage);
> +
> +void iommu_unbind_guest_stage(struct iommu_domain *domain, struct device *dev)
> +{
> +	if (unlikely(!domain->ops->unbind_guest_stage))
> +		return;
> +
> +	domain->ops->unbind_guest_stage(domain, dev);
> +}
> +EXPORT_SYMBOL_GPL(iommu_unbind_guest_stage);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
>  				  struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 19938ee..aec853d 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include <linux/errno.h>
>  #include <linux/err.h>
>  #include <linux/of.h>
> +#include <uapi/linux/iommu.h>
>  
>  #define IOMMU_READ	(1 << 0)
>  #define IOMMU_WRITE	(1 << 1)
> @@ -187,6 +188,8 @@ struct iommu_resv_region {
>   * @domain_get_windows: Return the number of windows for a domain
>   * @of_xlate: add OF master IDs to iommu grouping
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @bind_guest_stage: program the guest stage configuration
> + * @unbind_guest_stage: unbind the guest stage and restore defaults
>   */
>  struct iommu_ops {
>  	bool (*capable)(enum iommu_cap);
> @@ -235,6 +238,11 @@ struct iommu_ops {
>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>  
> +	int (*bind_guest_stage)(struct iommu_domain *domain, struct device *dev,
> +				struct iommu_guest_stage_config *cfg);
> +	void (*unbind_guest_stage)(struct iommu_domain *domain,
> +				   struct device *dev);
> +
>  	unsigned long pgsize_bitmap;
>  };
>  
> @@ -296,6 +304,10 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>  			       struct device *dev);
>  extern void iommu_detach_device(struct iommu_domain *domain,
>  				struct device *dev);
> +extern int iommu_bind_guest_stage(struct iommu_domain *domain,
> +		struct device *dev, struct iommu_guest_stage_config *cfg);
> +extern void iommu_unbind_guest_stage(struct iommu_domain *domain,
> +				     struct device *dev);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
>  		     phys_addr_t paddr, size_t size, int prot);
> @@ -696,6 +708,17 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>  	return NULL;
>  }
>  
> +static inline
> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct device *dev,
> +			   struct iommu_guest_stage_config *cfg)
> +{
> +	return -ENODEV;
> +}
> +static inline
> +void iommu_unbind_guest_stage(struct iommu_domain *domain, struct device *dev)
> +{
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #endif /* __LINUX_IOMMU_H */
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index 0000000..2d1593c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,59 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + *
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef _UAPI_IOMMU_H
> +#define _UAPI_IOMMU_H
> +
> +#include <linux/types.h>
> +
> +/**
> + * PASID table data used to bind guest PASID table to the host IOMMU. This will
> + * enable guest managed first level page tables.
> + * @version: for future extensions and identification of the data format
> + * @bytes: size of this structure
> + * @base_ptr:	PASID table pointer
> + * @pasid_bits:	number of bits supported in the guest PASID table, must be less
> + *		or equal than the host supported PASID size.
> + */
> +struct iommu_pasid_table_config {
> +	__u32 version;
> +#define PASID_TABLE_CFG_VERSION_1 1
> +	__u32 bytes;
> +	__u64 base_ptr;
> +	__u8 pasid_bits;
> +};
> +
> +/**
> + * Stream Table Entry S1 info
> + * @disabled: the smmu is disabled
> + * @bypassed: stage 1 is bypassed
> + * @aborted: aborted
> + * @cdptr_dma: GPA of the Context Descriptor
> + * @asid: address space identified associated to the CD
> + */
> +struct iommu_smmu_s1_config {
> +	__u8 disabled;
> +	__u8 bypassed;
> +	__u8 aborted;
> +	__u64 cdptr_dma;
> +	__u16 asid;
This asid field is a leftover. asid is part of the CD (owned by the
guest) and cannot be conveyed through this API. What is relevant is to
pass is the guest asid_bits (vSMMU IDR1 info), to be compared with the
host one. So we end up having something similar to the base_ptr and
pasid_bits of iommu_pasid_table_config.

Otherwise, the other fields (disable, bypassed, aborted) can be packed
as masks in a _u8 flag.

Anyway this API still is at the stage of proof of concept. At least it
shows the similarities between that use case and the PASID one and
should encourage to discuss about the relevance to have a unified API to
pass the guest stage info.

Thanks

Eric
> +};
> +
> +struct iommu_guest_stage_config {
> +#define PASID_TABLE	(1 << 0)
> +#define SMMUV3_S1_CFG	(1 << 1)
> +	__u32 flags;
> +	union {
> +		struct iommu_pasid_table_config pasidt;
> +		struct iommu_smmu_s1_config smmu_s1;
> +	};
> +};
> +
> +#endif /* _UAPI_IOMMU_H */
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
       [not found]   ` <A2975661238FB949B60364EF0F2C257439CCE9EE@SHSMSX104.ccr.corp.intel.com>
@ 2018-08-24 13:20     ` Auger Eric
  0 siblings, 0 replies; 21+ messages in thread
From: Auger Eric @ 2018-08-24 13:20 UTC (permalink / raw)
  To: Liu, Yi L, "eric.auger.pro, eric.auger, iommu, linux-kernel,
	kvm, kvmarm, joro, alex.williamson, jean-philippe.brucker,
	jacob.jun.pan, yi.l.liu"@linux.intel.com, eric.auger, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson,
	jean-philippe.brucker, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Yi Liu,

On 08/24/2018 02:53 PM, Liu, Yi L wrote:
> Hi Eric,
> 
>> From: iommu-bounces@lists.linux-foundation.org [mailto:iommu-
>> bounces@lists.linux-foundation.org] On Behalf Of Eric Auger
>> Sent: Thursday, August 23, 2018 8:17 PM
>> Subject: [RFC 01/13] iommu: Introduce bind_guest_stage API
>>
>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>
>> In virtualization use case, when a guest is assigned
>> a PCI host device, protected by a virtual IOMMU on a guest,
>> the physical IOMMU must be programmed to be consistent with
>> the guest mappings. If the physical IOMMU supports two
>> translation stages it makes sense to program guest mappings
>> onto the first stage while to host owns the stage 2.
>>
>> In that case, it is mandated to trap on guest configuration
>> settings and pass those to the physical iommu driver.
>>
>> This patch adds a new API to the iommu subsystem that allows
>> to bind and unbind the guest configuration data to the host.
>>
>> A generic iommu_guest_stage_config struct is introduced in
>> a new iommu.h uapi header. This is going to be used by the VFIO
>> user API. We foresee at least 2 specialization of this struct,
>> for PASID table passing and ARM SMMUv3.
>>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> This patch generalizes the API introduced by Jacob & co-authors in
>> https://lwn.net/Articles/754331/
>> ---
>>  drivers/iommu/iommu.c      | 19 +++++++++++++++
>>  include/linux/iommu.h      | 23 ++++++++++++++++++
>>  include/uapi/linux/iommu.h | 59
>> ++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 101 insertions(+)
>>  create mode 100644 include/uapi/linux/iommu.h
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 63b3756..5156172 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -1326,6 +1326,25 @@ int iommu_attach_device(struct iommu_domain
>> *domain, struct device *dev)
>>  }
>>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>>
>> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct device *dev,
>> +			   struct iommu_guest_stage_config *cfg)
>> +{
>> +	if (unlikely(!domain->ops->bind_guest_stage))
>> +		return -ENODEV;
>> +
>> +	return domain->ops->bind_guest_stage(domain, dev, cfg);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_bind_guest_stage);
>> +
>> +void iommu_unbind_guest_stage(struct iommu_domain *domain, struct device
>> *dev)
>> +{
>> +	if (unlikely(!domain->ops->unbind_guest_stage))
>> +		return;
>> +
>> +	domain->ops->unbind_guest_stage(domain, dev);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_unbind_guest_stage);
>> +
>>  static void __iommu_detach_device(struct iommu_domain *domain,
>>  				  struct device *dev)
>>  {
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 19938ee..aec853d 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -25,6 +25,7 @@
>>  #include <linux/errno.h>
>>  #include <linux/err.h>
>>  #include <linux/of.h>
>> +#include <uapi/linux/iommu.h>
>>
>>  #define IOMMU_READ	(1 << 0)
>>  #define IOMMU_WRITE	(1 << 1)
>> @@ -187,6 +188,8 @@ struct iommu_resv_region {
>>   * @domain_get_windows: Return the number of windows for a domain
>>   * @of_xlate: add OF master IDs to iommu grouping
>>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>> + * @bind_guest_stage: program the guest stage configuration
>> + * @unbind_guest_stage: unbind the guest stage and restore defaults
>>   */
>>  struct iommu_ops {
>>  	bool (*capable)(enum iommu_cap);
>> @@ -235,6 +238,11 @@ struct iommu_ops {
>>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device
>> *dev);
>>
>> +	int (*bind_guest_stage)(struct iommu_domain *domain, struct device *dev,
>> +				struct iommu_guest_stage_config *cfg);
>> +	void (*unbind_guest_stage)(struct iommu_domain *domain,
>> +				   struct device *dev);
>> +
>>  	unsigned long pgsize_bitmap;
>>  };
>>
>> @@ -296,6 +304,10 @@ extern int iommu_attach_device(struct iommu_domain
>> *domain,
>>  			       struct device *dev);
>>  extern void iommu_detach_device(struct iommu_domain *domain,
>>  				struct device *dev);
>> +extern int iommu_bind_guest_stage(struct iommu_domain *domain,
>> +		struct device *dev, struct iommu_guest_stage_config *cfg);
>> +extern void iommu_unbind_guest_stage(struct iommu_domain *domain,
>> +				     struct device *dev);
>>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
>>  		     phys_addr_t paddr, size_t size, int prot);
>> @@ -696,6 +708,17 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct
>> fwnode_handle *fwnode)
>>  	return NULL;
>>  }
>>
>> +static inline
>> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct device *dev,
>> +			   struct iommu_guest_stage_config *cfg)
>> +{
>> +	return -ENODEV;
>> +}
>> +static inline
>> +void iommu_unbind_guest_stage(struct iommu_domain *domain, struct device
>> *dev)
>> +{
>> +}
>> +
>>  #endif /* CONFIG_IOMMU_API */
>>
>>  #endif /* __LINUX_IOMMU_H */
>> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
>> new file mode 100644
>> index 0000000..2d1593c
>> --- /dev/null
>> +++ b/include/uapi/linux/iommu.h
>> @@ -0,0 +1,59 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +/*
>> + * IOMMU user API definitions
>> + *
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#ifndef _UAPI_IOMMU_H
>> +#define _UAPI_IOMMU_H
>> +
>> +#include <linux/types.h>
>> +
>> +/**
>> + * PASID table data used to bind guest PASID table to the host IOMMU. This will
>> + * enable guest managed first level page tables.
>> + * @version: for future extensions and identification of the data format
>> + * @bytes: size of this structure
>> + * @base_ptr:	PASID table pointer
>> + * @pasid_bits:	number of bits supported in the guest PASID table, must be
>> less
>> + *		or equal than the host supported PASID size.
>> + */
>> +struct iommu_pasid_table_config {
>> +	__u32 version;
>> +#define PASID_TABLE_CFG_VERSION_1 1
>> +	__u32 bytes;
>> +	__u64 base_ptr;
>> +	__u8 pasid_bits;
>> +};
>> +
>> +/**
>> + * Stream Table Entry S1 info
>> + * @disabled: the smmu is disabled
>> + * @bypassed: stage 1 is bypassed
>> + * @aborted: aborted
>> + * @cdptr_dma: GPA of the Context Descriptor
>> + * @asid: address space identified associated to the CD
>> + */
>> +struct iommu_smmu_s1_config {
>> +	__u8 disabled;
>> +	__u8 bypassed;
>> +	__u8 aborted;
>> +	__u64 cdptr_dma;
> 
> If I'm getting it correctly, so with cdptr_dma configed on
> host, hardware can access guest's CD table when nested
> stage is setup on host. Is it? If yes, I think it is similar with
> the base_ptr field in struct iommu_pasid_table_config.

Yes you're right, this is similar to base_ptr.

Thanks

Eric
> 
>> +	__u16 asid;
>> +};
>> +
>> +struct iommu_guest_stage_config {
>> +#define PASID_TABLE	(1 << 0)
>> +#define SMMUV3_S1_CFG	(1 << 1)
>> +	__u32 flags;
>> +	union {
>> +		struct iommu_pasid_table_config pasidt;
>> +		struct iommu_smmu_s1_config smmu_s1;
>> +	};
>> +};
>> +
>> +#endif /* _UAPI_IOMMU_H */
> 
> Thanks,
> Yi Liu
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-08-23 15:25   ` [RFC 01/13] iommu: Introduce bind_guest_stage API Auger Eric
@ 2018-08-31 13:11     ` Jean-Philippe Brucker
  2018-08-31 13:52       ` Auger Eric
  0 siblings, 1 reply; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-08-31 13:11 UTC (permalink / raw)
  To: Auger Eric, eric.auger.pro, iommu, linux-kernel, kvm, kvmarm,
	joro, alex.williamson, jacob.jun.pan, yi.l.liu, will.deacon,
	robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Eric,

On 23/08/18 16:25, Auger Eric wrote:
>> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct device *dev,
>> +			   struct iommu_guest_stage_config *cfg)

About the name change from iommu_bind_pasid_table: is the intent to
reuse this API for SMMUv2, which supports nested but not PASID? Seems
like a good idea but "iommu_bind_table" may be better since "stage" is
only used by Arm.

>> +/**
>> + * PASID table data used to bind guest PASID table to the host IOMMU. This will
>> + * enable guest managed first level page tables.
>> + * @version: for future extensions and identification of the data format
>> + * @bytes: size of this structure
>> + * @base_ptr:	PASID table pointer
>> + * @pasid_bits:	number of bits supported in the guest PASID table, must be less
>> + *		or equal than the host supported PASID size.
>> + */
>> +struct iommu_pasid_table_config {
>> +	__u32 version;
>> +#define PASID_TABLE_CFG_VERSION_1 1
>> +	__u32 bytes;
>> +	__u64 base_ptr;
>> +	__u8 pasid_bits;
>> +};
>> +
>> +/**
>> + * Stream Table Entry S1 info
>> + * @disabled: the smmu is disabled
>> + * @bypassed: stage 1 is bypassed
>> + * @aborted: aborted
>> + * @cdptr_dma: GPA of the Context Descriptor
>> + * @asid: address space identified associated to the CD
>> + */
>> +struct iommu_smmu_s1_config {
>> +	__u8 disabled;
>> +	__u8 bypassed;
>> +	__u8 aborted;
>> +	__u64 cdptr_dma;
>> +	__u16 asid;
> This asid field is a leftover. asid is part of the CD (owned by the
> guest) and cannot be conveyed through this API. What is relevant is to
> pass is the guest asid_bits (vSMMU IDR1 info), to be compared with the
> host one. So we end up having something similar to the base_ptr and
> pasid_bits of iommu_pasid_table_config.

That part seems strange. Userspace wouldn't try arbitrary ASID sizes
until one fits, especially since ASID size isn't the only parameter:
there will be SSID, IOVA, IPA sizes, HTTU features and I guess any other
SMMU_IDRx bit that corresponds to a field in the CD or STE. Doesn't
vSMMU need to tailor its IDR registers to whatever is supported by the
host SMMU, in order to use nested translation?

Before creating the virtual ID registers, userspace will likely want to
know more about the physical IOMMU, by querying the kernel. I wrote
something about that interface recently:
https://www.spinics.net/lists/iommu/msg28039.html

And if you provide this interface, checking the parameters again in
BIND_GUEST_STAGE isn't very useful, it only tells you that userspace
read your values. Once the guest writes the CD, it could still use the
wrong ASID size or some other invalid config. That would be caught by
the SMMU walker and cause a C_BAD_CD error report.

> Otherwise, the other fields (disable, bypassed, aborted) can be packed
> as masks in a _u8 flag.

What's the difference between aborted and disabled? I'm also not sure we
should give such fine tweaks to userspace, since the STE code is already
pretty complicated and has to deal with several state transitions.
Existing behavior (1) (2) (5) probably needs to be preserved:

(1) Initially no container is set, s1-translate s2-bypass with the
default domain (non-VFIO)
(2) A VFIO_TYPE1_NESTING_IOMMU container is set, s1-bypass s2-translate
(3) BIND_GUEST_STAGE, s1-translate s2-translate
(4) UNBIND_GUEST_STAGE, s1-bypass s2-translate
(5) Remove container, s1-translate s2-bypass with the default domain

That said, when instantiating a vSMMU, QEMU may want to switch from (2)
to an intermediate "s1-abort s2-translate" state. At the moment with
virtio-iommu I only create the stage-2 mappings at (3). This ensures
that transactions abort until the guest configures the vIOMMU and it
keeps the host driver simple, but it has the downside of pinning guest
memory lazily (necessary with virtio-iommu since the guest falls back to
the map/unmap interface, which doesn't pin all memory upfront, if it
can't support nested).

> Anyway this API still is at the stage of proof of concept. At least it
> shows the similarities between that use case and the PASID one and
> should encourage to discuss about the relevance to have a unified API to
> pass the guest stage info.
Other required fields in the BIND_GUEST_STAGE ioctl are at least s1dss,
s1cdmax and s1fmt. It's not for sanity-check, they describe the guest
configuration. The guest selects a PASID table format (1-level, 2-level
4k/64k) which is written into STE.s1fmt. It also selects the behavior of
requests-without-pasid, which is written into STE.s1dss. s1cdmax
corresponds to pasid_bits.

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 02/13] iommu: Introduce tlb_invalidate API
       [not found] ` <1535026656-8450-3-git-send-email-eric.auger@redhat.com>
@ 2018-08-31 13:17   ` Jean-Philippe Brucker
  2018-08-31 14:07     ` Auger Eric
  0 siblings, 1 reply; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-08-31 13:17 UTC (permalink / raw)
  To: Eric Auger, "eric.auger.pro, eric.auger, iommu, linux-kernel,
	kvm, kvmarm, joro, alex.williamson, jean-philippe.brucker,
	jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

On 23/08/18 13:17, Eric Auger wrote:
> +/**
> + * Translation cache invalidation information, contains generic IOMMU
> + * data which can be parsed based on model ID by model specific drivers.
> + * Since the invalidation of second level page tables are included in the
> + * unmap operation, this info is only applicable to the first level
> + * translation caches, i.e. DMA request with PASID.
> + *
> + * @granularity:	requested invalidation granularity, type dependent
> + * @size:		2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
> + * @nr_pages:		number of pages to invalidate
> + * @pasid:		processor address space ID value per PCI spec.
> + * @addr:		page address to be invalidated
> + * @flags		IOMMU_INVALIDATE_ADDR_LEAF: leaf paging entries
> + *			IOMMU_INVALIDATE_GLOBAL_PAGE: global pages
> + *
> + */
> +struct iommu_tlb_invalidate_info {
> +	struct iommu_tlb_invalidate_hdr	hdr;
> +	enum iommu_inv_granularity	granularity;
> +	__u32		flags;
> +#define IOMMU_INVALIDATE_ADDR_LEAF	(1 << 0)
> +#define IOMMU_INVALIDATE_GLOBAL_PAGE	(1 << 1)
> +	__u8		size;
> +	__u64		nr_pages;
> +	__u32		pasid;
> +	__u64		addr;
> +};
>  #endif /* _UAPI_IOMMU_H */

Since the ioctl will be used to combine invalidations (invalidate both
ATC and TLB with a single call), we need an additional ASID field for
the SMMU - ATC is invalidated by PASID, TLB by ASID. I used to call it
"tag", but I'm leaning towards "arch_id" now
(http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=40fdef74816dd8d8d113100b9e0162fab4cec28d)

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 09/13] iommu/smmuv3: Get prepared for nested stage support
       [not found] ` <1535026656-8450-10-git-send-email-eric.auger@redhat.com>
@ 2018-08-31 13:20   ` Jean-Philippe Brucker
  2018-08-31 14:11     ` Auger Eric
  0 siblings, 1 reply; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-08-31 13:20 UTC (permalink / raw)
  To: Eric Auger, "eric.auger.pro, eric.auger, iommu, linux-kernel,
	kvm, kvmarm, joro, alex.williamson, jean-philippe.brucker,
	jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

On 23/08/18 13:17, Eric Auger wrote:
>  	if (ste->s1_cfg) {
> -		BUG_ON(ste_live);

Scary! :) The current code assumes that it can make modifications to the
STE in any order and enable translation after a sync. So far I haven't
been able to find anything that violates this rule in the spec: "If
software modifies the structure while it is valid, it must not allow the
structure to enter an invalid intermediate state." So maybe it's fine,
though to be safe I would have started with disabling the STE
(http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=936e49f923e101c061269eadd5fa43fef819d2e9)

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-08-31 13:11     ` Jean-Philippe Brucker
@ 2018-08-31 13:52       ` Auger Eric
  2018-09-03 12:19         ` Jean-Philippe Brucker
  2018-09-04  7:57         ` Tian, Kevin
  0 siblings, 2 replies; 21+ messages in thread
From: Auger Eric @ 2018-08-31 13:52 UTC (permalink / raw)
  To: Jean-Philippe Brucker, eric.auger.pro, iommu, linux-kernel, kvm,
	kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu,
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Jean-Philippe,

On 08/31/2018 03:11 PM, Jean-Philippe Brucker wrote:
> Hi Eric,
> 
> On 23/08/18 16:25, Auger Eric wrote:
>>> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct device *dev,
>>> +			   struct iommu_guest_stage_config *cfg)
> 
> About the name change from iommu_bind_pasid_table: is the intent to
> reuse this API for SMMUv2, which supports nested but not PASID? Seems
> like a good idea but "iommu_bind_table" may be better since "stage" is
> only used by Arm.

At the moment I don't target SMUv2 but just SMMUv3. My focus was on
nested stage enablement without enabling the multi-CD feature (PASID),
whish is not supported by the QEMU vSMMUv3. Afterwards I realized that
basically we are pointing to a CD or PASID table and that's about the
same. I don't have a strong opinion on the name, iommu_bind_guest_table
or iommu_bind_pasid_table would be fine with me. Indeed "stage" is ARM
vocable (level for Intel?)
> 
>>> +/**
>>> + * PASID table data used to bind guest PASID table to the host IOMMU. This will
>>> + * enable guest managed first level page tables.
>>> + * @version: for future extensions and identification of the data format
>>> + * @bytes: size of this structure
>>> + * @base_ptr:	PASID table pointer
>>> + * @pasid_bits:	number of bits supported in the guest PASID table, must be less
>>> + *		or equal than the host supported PASID size.
>>> + */
>>> +struct iommu_pasid_table_config {
>>> +	__u32 version;
>>> +#define PASID_TABLE_CFG_VERSION_1 1
>>> +	__u32 bytes;
>>> +	__u64 base_ptr;
>>> +	__u8 pasid_bits;
>>> +};
>>> +
>>> +/**
>>> + * Stream Table Entry S1 info
>>> + * @disabled: the smmu is disabled
>>> + * @bypassed: stage 1 is bypassed
>>> + * @aborted: aborted
>>> + * @cdptr_dma: GPA of the Context Descriptor
>>> + * @asid: address space identified associated to the CD
>>> + */
>>> +struct iommu_smmu_s1_config {
>>> +	__u8 disabled;
>>> +	__u8 bypassed;
>>> +	__u8 aborted;
>>> +	__u64 cdptr_dma;
>>> +	__u16 asid;
>> This asid field is a leftover. asid is part of the CD (owned by the
>> guest) and cannot be conveyed through this API. What is relevant is to
>> pass is the guest asid_bits (vSMMU IDR1 info), to be compared with the
>> host one. So we end up having something similar to the base_ptr and
>> pasid_bits of iommu_pasid_table_config.
> 
> That part seems strange. Userspace wouldn't try arbitrary ASID sizes
> until one fits, especially since ASID size isn't the only parameter:
> there will be SSID, IOVA, IPA sizes, HTTU features and I guess any other
> SMMU_IDRx bit that corresponds to a field in the CD or STE. Doesn't
> vSMMU need to tailor its IDR registers to whatever is supported by the
> host SMMU, in order to use nested translation?
Yes I agree this is needed anyway.
> 
> Before creating the virtual ID registers, userspace will likely want to
> know more about the physical IOMMU, by querying the kernel. I wrote
> something about that interface recently:
> https://www.spinics.net/lists/iommu/msg28039.html

Ah OK I missed that part of the discussion. About the sysfs API, we
devised one in the past for reserved_regions. I thought it would be
useful for QEMU to identify them but eventually Alex pushed to create a
VFIO API instead to make things more self-contained. We would need to
double check with him.
> 
> And if you provide this interface, checking the parameters again in
> BIND_GUEST_STAGE isn't very useful, it only tells you that userspace
> read your values. Once the guest writes the CD, it could still use the
> wrong ASID size or some other invalid config. That would be caught by
> the SMMU walker and cause a C_BAD_CD error report.

Yes actually if there is some mismatch we might get BAD_STE/BAD_CD
events. This might be another way to handle things. I did not integrate
the fault API yet. This exercise is need to understand how we can catch
things at QEMU level.
> 
>> Otherwise, the other fields (disable, bypassed, aborted) can be packed
>> as masks in a _u8 flag.
> 
> What's the difference between aborted and disabled? I'm also not sure we
> should give such fine tweaks to userspace, since the STE code is already
> pretty complicated and has to deal with several state transitions.
> Existing behavior (1) (2) (5) probably needs to be preserved:
> 
> (1) Initially no container is set, s1-translate s2-bypass with the
> default domain (non-VFIO)
> (2) A VFIO_TYPE1_NESTING_IOMMU container is set, s1-bypass s2-translate
> (3) BIND_GUEST_STAGE, s1-translate s2-translate
> (4) UNBIND_GUEST_STAGE, s1-bypass s2-translate
> (5) Remove container, s1-translate s2-bypass with the default domain
> 
> That said, when instantiating a vSMMU, QEMU may want to switch from (2)
> to an intermediate "s1-abort s2-translate" state. At the moment with
> virtio-iommu I only create the stage-2 mappings at (3). This ensures
> that transactions abort until the guest configures the vIOMMU and it
> keeps the host driver simple, but it has the downside of pinning guest
> memory lazily (necessary with virtio-iommu since the guest falls back to
> the map/unmap interface, which doesn't pin all memory upfront, if it
> can't support nested).

For now, I have not made any assumptions here and just made sure I was
passing the S1 Config part. I would be tempted to think that only bypass
is requested and abort/disabled could be implemented by the user by
tearing the binding down, as suggested by Jacob.
> 
>> Anyway this API still is at the stage of proof of concept. At least it
>> shows the similarities between that use case and the PASID one and
>> should encourage to discuss about the relevance to have a unified API to
>> pass the guest stage info.
> Other required fields in the BIND_GUEST_STAGE ioctl are at least s1dss,
> s1cdmax and s1fmt. It's not for sanity-check, they describe the guest
> configuration. The guest selects a PASID table format (1-level, 2-level
> 4k/64k) which is written into STE.s1fmt. It also selects the behavior of
> requests-without-pasid, which is written into STE.s1dss. s1cdmax
> corresponds to pasid_bits.
Agreed. in vSMMU I only support S1CDMax =0 so I did not get into those
troubles.

Do we agree here we can get rid of the struct device * parameter?

Thanks

Eric

> 
> Thanks,
> Jean
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 02/13] iommu: Introduce tlb_invalidate API
  2018-08-31 13:17   ` [RFC 02/13] iommu: Introduce tlb_invalidate API Jean-Philippe Brucker
@ 2018-08-31 14:07     ` Auger Eric
  2018-09-03 12:28       ` Jean-Philippe Brucker
  0 siblings, 1 reply; 21+ messages in thread
From: Auger Eric @ 2018-08-31 14:07 UTC (permalink / raw)
  To: Jean-Philippe Brucker, eric.auger.pro, iommu, linux-kernel, kvm,
	kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Jean-Philippe,

On 08/31/2018 03:17 PM, Jean-Philippe Brucker wrote:
> On 23/08/18 13:17, Eric Auger wrote:
>> +/**
>> + * Translation cache invalidation information, contains generic IOMMU
>> + * data which can be parsed based on model ID by model specific drivers.
>> + * Since the invalidation of second level page tables are included in the
>> + * unmap operation, this info is only applicable to the first level
>> + * translation caches, i.e. DMA request with PASID.
>> + *
>> + * @granularity:	requested invalidation granularity, type dependent
>> + * @size:		2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
>> + * @nr_pages:		number of pages to invalidate
>> + * @pasid:		processor address space ID value per PCI spec.
>> + * @addr:		page address to be invalidated
>> + * @flags		IOMMU_INVALIDATE_ADDR_LEAF: leaf paging entries
>> + *			IOMMU_INVALIDATE_GLOBAL_PAGE: global pages
>> + *
>> + */
>> +struct iommu_tlb_invalidate_info {
>> +	struct iommu_tlb_invalidate_hdr	hdr;
>> +	enum iommu_inv_granularity	granularity;
>> +	__u32		flags;
>> +#define IOMMU_INVALIDATE_ADDR_LEAF	(1 << 0)
>> +#define IOMMU_INVALIDATE_GLOBAL_PAGE	(1 << 1)
>> +	__u8		size;
>> +	__u64		nr_pages;
>> +	__u32		pasid;
>> +	__u64		addr;
>> +};
>>  #endif /* _UAPI_IOMMU_H */
> 
> Since the ioctl will be used to combine invalidations (invalidate both
> ATC and TLB with a single call), we need an additional ASID field for
> the SMMU - ATC is invalidated by PASID, TLB by ASID. I used to call it
> "tag", but I'm leaning towards "arch_id" now
> (http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=40fdef74816dd8d8d113100b9e0162fab4cec28d)

I aknowledge I am not crystal clear about that. for a given iommu_domain
don't you have a single asid. Can't you retrieve the asid from the
iommu_domain/arm_smmu_domain/arm_smmu_s1_cfg/arm_smmu_ctx_desc.asid?
Here again I am confused bout the dual iommu_domain/struct device
parameters.

I have another trouble while doing the QEMU integration.
When the guests does an NH_ALL, this propagates an invalidation on the
whole IPA range and we must discriminate that from regular NH_VA calls.
How would you encode the NH_ALL with this API?

Besides I discover you work on virtio-iommu/stage2 enablement ;-)

Thanks

Eric
> 
> Thanks,
> Jean
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 09/13] iommu/smmuv3: Get prepared for nested stage support
  2018-08-31 13:20   ` [RFC 09/13] iommu/smmuv3: Get prepared for nested stage support Jean-Philippe Brucker
@ 2018-08-31 14:11     ` Auger Eric
  2018-09-03 12:29       ` Jean-Philippe Brucker
  0 siblings, 1 reply; 21+ messages in thread
From: Auger Eric @ 2018-08-31 14:11 UTC (permalink / raw)
  To: Jean-Philippe Brucker, "eric.auger.pro, iommu, linux-kernel,
	kvm, kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Jean-Philippe,

On 08/31/2018 03:20 PM, Jean-Philippe Brucker wrote:
> On 23/08/18 13:17, Eric Auger wrote:
>>  	if (ste->s1_cfg) {
>> -		BUG_ON(ste_live);
> 
> Scary! :) The current code assumes that it can make modifications to the
> STE in any order and enable translation after a sync. So far I haven't
> been able to find anything that violates this rule in the spec: "If
> software modifies the structure while it is valid, it must not allow the
> structure to enter an invalid intermediate state." So maybe it's fine,
> though to be safe I would have started with disabling the STE
> (http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=936e49f923e101c061269eadd5fa43fef819d2e9)

Yep this works with my setup but I was waiting for such kind of comments
to turn this prototype into something more "production level" ;-) Did
you send anything upstream, related to this branch?

Thanks

Eric
> 
> Thanks,
> Jean
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-08-31 13:52       ` Auger Eric
@ 2018-09-03 12:19         ` Jean-Philippe Brucker
  2018-09-04  7:57         ` Tian, Kevin
  1 sibling, 0 replies; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-09-03 12:19 UTC (permalink / raw)
  To: Auger Eric, eric.auger.pro, iommu, linux-kernel, kvm, kvmarm,
	joro, alex.williamson, jacob.jun.pan, yi.l.liu, will.deacon,
	robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

On 31/08/2018 14:52, Auger Eric wrote:
> Do we agree here we can get rid of the struct device * parameter?

That's fine by me, in my opinion the bind operation should only be on
the domain, like map/unmap. For the invalidation however, I think we
need to keep the device as an optional parameter, because the guest may
want to invalidate the ATC (DTLB) of a single device.

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 02/13] iommu: Introduce tlb_invalidate API
  2018-08-31 14:07     ` Auger Eric
@ 2018-09-03 12:28       ` Jean-Philippe Brucker
  2018-09-03 12:41         ` Auger Eric
  0 siblings, 1 reply; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-09-03 12:28 UTC (permalink / raw)
  To: Auger Eric, eric.auger.pro, iommu, linux-kernel, kvm, kvmarm,
	joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

On 31/08/2018 15:07, Auger Eric wrote:
>> Since the ioctl will be used to combine invalidations (invalidate both
>> ATC and TLB with a single call), we need an additional ASID field for
>> the SMMU - ATC is invalidated by PASID, TLB by ASID. I used to call it
>> "tag", but I'm leaning towards "arch_id" now
>> (http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=40fdef74816dd8d8d113100b9e0162fab4cec28d)
> 
> I aknowledge I am not crystal clear about that. for a given iommu_domain
> don't you have a single asid. Can't you retrieve the asid from the
> iommu_domain/arm_smmu_domain/arm_smmu_s1_cfg/arm_smmu_ctx_desc.asid?
> Here again I am confused bout the dual iommu_domain/struct device
> parameters.

In nested mode, ASIDs are allocated by the guest and written into the CD
table. Even if there is a single CD it will still be private to the
guest. When receiving the invalidation, the host could walk the CD
tables to retrieve the ASID, but it's not guaranteed to be here anymore:
the guest could well clear a CD before sending the invalidate command.

> I have another trouble while doing the QEMU integration.
> When the guests does an NH_ALL, this propagates an invalidation on the
> whole IPA range and we must discriminate that from regular NH_VA calls.
> How would you encode the NH_ALL with this API?

I think that translates to an invalidate-all for the domain:

struct tlb_iommu_invalidate_info info = {
	.hdr.type = IOMMU_INV_TYPE_TLB,
	.granularity = IOMMU_INV_GRANU_DOMAIN_ALL_PASID,
};

Reading the spec again, I though the API was missing a way to encode
TLBI_NH_VAA, invalidate a range for all ASIDs. Although it feels
contrived, we could represent it with the following:

struct tlb_iommu_invalidate_info info = {
	.hdr.type = IOMMU_INV_TYPE_TLB,
	.granularity = IOMMU_INV_GRANU_PAGE_PASID,
	.flags = IOMMU_INVALIDATE_GLOBAL_PAGE,
	.addr = ...
};

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 09/13] iommu/smmuv3: Get prepared for nested stage support
  2018-08-31 14:11     ` Auger Eric
@ 2018-09-03 12:29       ` Jean-Philippe Brucker
  2018-09-03 12:48         ` Auger Eric
  0 siblings, 1 reply; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-09-03 12:29 UTC (permalink / raw)
  To: Auger Eric, "eric.auger.pro, iommu, linux-kernel, kvm,
	kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

On 31/08/2018 15:11, Auger Eric wrote:
> Yep this works with my setup but I was waiting for such kind of comments
> to turn this prototype into something more "production level" ;-) Did
> you send anything upstream, related to this branch?

No I didn't have time to clean it up yet, and don't know when I'll be
able to send it out. With host SVA and virtio-iommu, I have enough
patches in flight to keep me busy for a while :) But you can obviously
pick patches from that branch if they help.

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 02/13] iommu: Introduce tlb_invalidate API
  2018-09-03 12:28       ` Jean-Philippe Brucker
@ 2018-09-03 12:41         ` Auger Eric
  2018-09-03 13:41           ` Jean-Philippe Brucker
  0 siblings, 1 reply; 21+ messages in thread
From: Auger Eric @ 2018-09-03 12:41 UTC (permalink / raw)
  To: Jean-Philippe Brucker, eric.auger.pro, iommu, linux-kernel, kvm,
	kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Jean-Philippe,
On 09/03/2018 02:28 PM, Jean-Philippe Brucker wrote:
> On 31/08/2018 15:07, Auger Eric wrote:
>>> Since the ioctl will be used to combine invalidations (invalidate both
>>> ATC and TLB with a single call), we need an additional ASID field for
>>> the SMMU - ATC is invalidated by PASID, TLB by ASID. I used to call it
>>> "tag", but I'm leaning towards "arch_id" now
>>> (http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=40fdef74816dd8d8d113100b9e0162fab4cec28d)
>>
>> I aknowledge I am not crystal clear about that. for a given iommu_domain
>> don't you have a single asid. Can't you retrieve the asid from the
>> iommu_domain/arm_smmu_domain/arm_smmu_s1_cfg/arm_smmu_ctx_desc.asid?
>> Here again I am confused bout the dual iommu_domain/struct device
>> parameters.
> 
> In nested mode, ASIDs are allocated by the guest and written into the CD
> table. Even if there is a single CD it will still be private to the
> guest. When receiving the invalidation, the host could walk the CD
> tables to retrieve the ASID, but it's not guaranteed to be here anymore:
> the guest could well clear a CD before sending the invalidate command.

That's fully correct. I messed up at the beginning with this asid and
that's perfectly true the asid needs to be passed. I will respin
accordingly.

> 
>> I have another trouble while doing the QEMU integration.
>> When the guests does an NH_ALL, this propagates an invalidation on the
>> whole IPA range and we must discriminate that from regular NH_VA calls.
>> How would you encode the NH_ALL with this API?
> 
> I think that translates to an invalidate-all for the domain:
> 
> struct tlb_iommu_invalidate_info info = {
> 	.hdr.type = IOMMU_INV_TYPE_TLB,
> 	.granularity = IOMMU_INV_GRANU_DOMAIN_ALL_PASID,
OK
> };
> 
> Reading the spec again, I though the API was missing a way to encode
> TLBI_NH_VAA, invalidate a range for all ASIDs. Although it feels
> contrived, we could represent it with the following:
> 
> struct tlb_iommu_invalidate_info info = {
> 	.hdr.type = IOMMU_INV_TYPE_TLB,
> 	.granularity = IOMMU_INV_GRANU_PAGE_PASID,
> 	.flags = IOMMU_INVALIDATE_GLOBAL_PAGE,
> 	.addr = ...
OK

Also what about CMD_CFI_CD(_ALL) propagation. Is it an
IOMMU_INV_TYPE_PASID invalidation?

Thanks

Eric
> };
> 
> Thanks,
> Jean
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 09/13] iommu/smmuv3: Get prepared for nested stage support
  2018-09-03 12:29       ` Jean-Philippe Brucker
@ 2018-09-03 12:48         ` Auger Eric
  0 siblings, 0 replies; 21+ messages in thread
From: Auger Eric @ 2018-09-03 12:48 UTC (permalink / raw)
  To: Jean-Philippe Brucker, eric.auger.pro, iommu, linux-kernel, kvm,
	kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Jean-Philippe,

On 09/03/2018 02:29 PM, Jean-Philippe Brucker wrote:
> On 31/08/2018 15:11, Auger Eric wrote:
>> Yep this works with my setup but I was waiting for such kind of comments
>> to turn this prototype into something more "production level" ;-) Did
>> you send anything upstream, related to this branch?
> 
> No I didn't have time to clean it up yet, and don't know when I'll be
> able to send it out. With host SVA and virtio-iommu, I have enough
> patches in flight to keep me busy for a while :) But you can obviously
> pick patches from that branch if they help.

I understand ;-) I will update the uapi according to our last
discussions and I will look at your patches, especially smmu-v3 changes,
adding your Sob when relevant.

Thanks

Eric
> 
> Thanks,
> Jean
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 02/13] iommu: Introduce tlb_invalidate API
  2018-09-03 12:41         ` Auger Eric
@ 2018-09-03 13:41           ` Jean-Philippe Brucker
  0 siblings, 0 replies; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-09-03 13:41 UTC (permalink / raw)
  To: Auger Eric, eric.auger.pro, iommu, linux-kernel, kvm, kvmarm,
	joro, alex.williamson, jacob.jun.pan, yi.l.liu",
	will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

On 03/09/2018 13:41, Auger Eric wrote:
> Also what about CMD_CFI_CD(_ALL) propagation. Is it an
> IOMMU_INV_TYPE_PASID invalidation?

Yes, INV_TYPE_PASID corresponds to the config invalidation. CMD_CFGI_CD
is granule INV_GRANU_PASID_SEL and CMD_CFGI_CD_ALL is granule
INV_GRANU_ALL_PASID

Thanks,
Jean


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-08-31 13:52       ` Auger Eric
  2018-09-03 12:19         ` Jean-Philippe Brucker
@ 2018-09-04  7:57         ` Tian, Kevin
  2018-09-04  8:10           ` Auger Eric
  1 sibling, 1 reply; 21+ messages in thread
From: Tian, Kevin @ 2018-09-04  7:57 UTC (permalink / raw)
  To: Auger Eric, Jean-Philippe Brucker, eric.auger.pro, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson, jacob.jun.pan,
	yi.l.liu, will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

> From: Auger Eric
> Sent: Friday, August 31, 2018 9:52 PM
> 
> Hi Jean-Philippe,
> 
> On 08/31/2018 03:11 PM, Jean-Philippe Brucker wrote:
> > Hi Eric,
> >
> > On 23/08/18 16:25, Auger Eric wrote:
> >>> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct
> device *dev,
> >>> +			   struct iommu_guest_stage_config *cfg)
> >
> > About the name change from iommu_bind_pasid_table: is the intent to
> > reuse this API for SMMUv2, which supports nested but not PASID? Seems
> > like a good idea but "iommu_bind_table" may be better since "stage" is
> > only used by Arm.
> 
> At the moment I don't target SMUv2 but just SMMUv3. My focus was on
> nested stage enablement without enabling the multi-CD feature (PASID),
> whish is not supported by the QEMU vSMMUv3. Afterwards I realized that
> basically we are pointing to a CD or PASID table and that's about the
> same. I don't have a strong opinion on the name, iommu_bind_guest_table
> or iommu_bind_pasid_table would be fine with me. Indeed "stage" is ARM
> vocable (level for Intel?)

Intel uses first level/second level. 

iommu_bind_table is a bit confusing. what should people take table as?
there is PASID table. there is also page table linked in each stage/level. and
maybe other tables in vendor-specific definition.

to me iommu_bind_pasid_table is still clearer. anyway in other places
we've used pasid explicitly in vfio/iommu APIs, then it should be general
enough to represent various implementations.

Thanks
Kevin




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-09-04  7:57         ` Tian, Kevin
@ 2018-09-04  8:10           ` Auger Eric
  2018-09-04  8:34             ` Tian, Kevin
  0 siblings, 1 reply; 21+ messages in thread
From: Auger Eric @ 2018-09-04  8:10 UTC (permalink / raw)
  To: Tian, Kevin, Jean-Philippe Brucker, eric.auger.pro, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson, jacob.jun.pan,
	yi.l.liu, will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Kevin,
On 09/04/2018 09:57 AM, Tian, Kevin wrote:
>> From: Auger Eric
>> Sent: Friday, August 31, 2018 9:52 PM
>>
>> Hi Jean-Philippe,
>>
>> On 08/31/2018 03:11 PM, Jean-Philippe Brucker wrote:
>>> Hi Eric,
>>>
>>> On 23/08/18 16:25, Auger Eric wrote:
>>>>> +int iommu_bind_guest_stage(struct iommu_domain *domain, struct
>> device *dev,
>>>>> +			   struct iommu_guest_stage_config *cfg)
>>>
>>> About the name change from iommu_bind_pasid_table: is the intent to
>>> reuse this API for SMMUv2, which supports nested but not PASID? Seems
>>> like a good idea but "iommu_bind_table" may be better since "stage" is
>>> only used by Arm.
>>
>> At the moment I don't target SMUv2 but just SMMUv3. My focus was on
>> nested stage enablement without enabling the multi-CD feature (PASID),
>> whish is not supported by the QEMU vSMMUv3. Afterwards I realized that
>> basically we are pointing to a CD or PASID table and that's about the
>> same. I don't have a strong opinion on the name, iommu_bind_guest_table
>> or iommu_bind_pasid_table would be fine with me. Indeed "stage" is ARM
>> vocable (level for Intel?)
> 
> Intel uses first level/second level. 
> 
> iommu_bind_table is a bit confusing. what should people take table as?
> there is PASID table. there is also page table linked in each stage/level. and
> maybe other tables in vendor-specific definition.
> 
> to me iommu_bind_pasid_table is still clearer. anyway in other places
> we've used pasid explicitly in vfio/iommu APIs, then it should be general
> enough to represent various implementations.

Fine for me.

However I I would suggest to rename the original iommu_sva_invalidate
into something that is SVA unrelated. iommu_tlb_invalidate is not OK as
this API also is used to invalidate context caches - which are not
iotlbs -. What about iommu_cache_invalidate?

At least we must clarify that this API can be used for something else
than SVA enablement.

Thanks

Eric
> 
> Thanks
> Kevin
> 
> 
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-09-04  8:10           ` Auger Eric
@ 2018-09-04  8:34             ` Tian, Kevin
  2018-09-04  8:41               ` Auger Eric
  0 siblings, 1 reply; 21+ messages in thread
From: Tian, Kevin @ 2018-09-04  8:34 UTC (permalink / raw)
  To: Auger Eric, Jean-Philippe Brucker, eric.auger.pro, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson, jacob.jun.pan,
	yi.l.liu, will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

> From: Auger Eric
> Sent: Tuesday, September 4, 2018 4:11 PM
> 
> Hi Kevin,
> On 09/04/2018 09:57 AM, Tian, Kevin wrote:
> >> From: Auger Eric
> >> Sent: Friday, August 31, 2018 9:52 PM
> >>
> >> Hi Jean-Philippe,
> >>
> >> On 08/31/2018 03:11 PM, Jean-Philippe Brucker wrote:
> >>> Hi Eric,
> >>>
> >>> On 23/08/18 16:25, Auger Eric wrote:
> >>>>> +int iommu_bind_guest_stage(struct iommu_domain *domain,
> struct
> >> device *dev,
> >>>>> +			   struct iommu_guest_stage_config *cfg)
> >>>
> >>> About the name change from iommu_bind_pasid_table: is the intent to
> >>> reuse this API for SMMUv2, which supports nested but not PASID?
> Seems
> >>> like a good idea but "iommu_bind_table" may be better since "stage" is
> >>> only used by Arm.
> >>
> >> At the moment I don't target SMUv2 but just SMMUv3. My focus was on
> >> nested stage enablement without enabling the multi-CD feature (PASID),
> >> whish is not supported by the QEMU vSMMUv3. Afterwards I realized
> that
> >> basically we are pointing to a CD or PASID table and that's about the
> >> same. I don't have a strong opinion on the name,
> iommu_bind_guest_table
> >> or iommu_bind_pasid_table would be fine with me. Indeed "stage" is
> ARM
> >> vocable (level for Intel?)
> >
> > Intel uses first level/second level.
> >
> > iommu_bind_table is a bit confusing. what should people take table as?
> > there is PASID table. there is also page table linked in each stage/level.
> and
> > maybe other tables in vendor-specific definition.
> >
> > to me iommu_bind_pasid_table is still clearer. anyway in other places
> > we've used pasid explicitly in vfio/iommu APIs, then it should be general
> > enough to represent various implementations.
> 
> Fine for me.
> 
> However I I would suggest to rename the original iommu_sva_invalidate
> into something that is SVA unrelated. iommu_tlb_invalidate is not OK as
> this API also is used to invalidate context caches - which are not
> iotlbs -. What about iommu_cache_invalidate?
> 
> At least we must clarify that this API can be used for something else
> than SVA enablement.
> 

Agree. using SVA is limiting.

I also agree that iommu_cache_invalidate is better, though I don't think
you want to pass guest context cache invalidation to host. that information
is fully under host control. :-)

Thanks
Kevin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-09-04  8:34             ` Tian, Kevin
@ 2018-09-04  8:41               ` Auger Eric
  2018-09-04  8:43                 ` Tian, Kevin
  2018-09-04  9:53                 ` Jean-Philippe Brucker
  0 siblings, 2 replies; 21+ messages in thread
From: Auger Eric @ 2018-09-04  8:41 UTC (permalink / raw)
  To: Tian, Kevin, Jean-Philippe Brucker, eric.auger.pro, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson, jacob.jun.pan,
	yi.l.liu, will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

Hi Kevin,

On 09/04/2018 10:34 AM, Tian, Kevin wrote:
>> From: Auger Eric
>> Sent: Tuesday, September 4, 2018 4:11 PM
>>
>> Hi Kevin,
>> On 09/04/2018 09:57 AM, Tian, Kevin wrote:
>>>> From: Auger Eric
>>>> Sent: Friday, August 31, 2018 9:52 PM
>>>>
>>>> Hi Jean-Philippe,
>>>>
>>>> On 08/31/2018 03:11 PM, Jean-Philippe Brucker wrote:
>>>>> Hi Eric,
>>>>>
>>>>> On 23/08/18 16:25, Auger Eric wrote:
>>>>>>> +int iommu_bind_guest_stage(struct iommu_domain *domain,
>> struct
>>>> device *dev,
>>>>>>> +			   struct iommu_guest_stage_config *cfg)
>>>>>
>>>>> About the name change from iommu_bind_pasid_table: is the intent to
>>>>> reuse this API for SMMUv2, which supports nested but not PASID?
>> Seems
>>>>> like a good idea but "iommu_bind_table" may be better since "stage" is
>>>>> only used by Arm.
>>>>
>>>> At the moment I don't target SMUv2 but just SMMUv3. My focus was on
>>>> nested stage enablement without enabling the multi-CD feature (PASID),
>>>> whish is not supported by the QEMU vSMMUv3. Afterwards I realized
>> that
>>>> basically we are pointing to a CD or PASID table and that's about the
>>>> same. I don't have a strong opinion on the name,
>> iommu_bind_guest_table
>>>> or iommu_bind_pasid_table would be fine with me. Indeed "stage" is
>> ARM
>>>> vocable (level for Intel?)
>>>
>>> Intel uses first level/second level.
>>>
>>> iommu_bind_table is a bit confusing. what should people take table as?
>>> there is PASID table. there is also page table linked in each stage/level.
>> and
>>> maybe other tables in vendor-specific definition.
>>>
>>> to me iommu_bind_pasid_table is still clearer. anyway in other places
>>> we've used pasid explicitly in vfio/iommu APIs, then it should be general
>>> enough to represent various implementations.
>>
>> Fine for me.
>>
>> However I I would suggest to rename the original iommu_sva_invalidate
>> into something that is SVA unrelated. iommu_tlb_invalidate is not OK as
>> this API also is used to invalidate context caches - which are not
>> iotlbs -. What about iommu_cache_invalidate?
>>
>> At least we must clarify that this API can be used for something else
>> than SVA enablement.
>>
> 
> Agree. using SVA is limiting.
> 
> I also agree that iommu_cache_invalidate is better, though I don't think
> you want to pass guest context cache invalidation to host. that information
> is fully under host control. :-)

I think the confusion comes from the different terminology used in VTD
and ARM SMMU spec.

Your PASID table ~ ARM SMMU Context Descriptor (CD) table
Your Root Entry/Context Entry ~ ARM SMMU Stream Table Entry (STE)

So I meant guesr invalidates its Context Descriptor cache. He "owns"
those. Host owns the STE.

Thanks

Eric

> 
> Thanks
> Kevin
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-09-04  8:41               ` Auger Eric
@ 2018-09-04  8:43                 ` Tian, Kevin
  2018-09-04  9:53                 ` Jean-Philippe Brucker
  1 sibling, 0 replies; 21+ messages in thread
From: Tian, Kevin @ 2018-09-04  8:43 UTC (permalink / raw)
  To: Auger Eric, Jean-Philippe Brucker, eric.auger.pro, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson, jacob.jun.pan,
	yi.l.liu, will.deacon, robin.murphy
  Cc: marc.zyngier, peter.maydell, christoffer.dall

> From: Auger Eric
> Sent: Tuesday, September 4, 2018 4:41 PM
> 
> Hi Kevin,
> 
> On 09/04/2018 10:34 AM, Tian, Kevin wrote:
> >> From: Auger Eric
> >> Sent: Tuesday, September 4, 2018 4:11 PM
> >>
> >> Hi Kevin,
> >> On 09/04/2018 09:57 AM, Tian, Kevin wrote:
> >>>> From: Auger Eric
> >>>> Sent: Friday, August 31, 2018 9:52 PM
> >>>>
> >>>> Hi Jean-Philippe,
> >>>>
> >>>> On 08/31/2018 03:11 PM, Jean-Philippe Brucker wrote:
> >>>>> Hi Eric,
> >>>>>
> >>>>> On 23/08/18 16:25, Auger Eric wrote:
> >>>>>>> +int iommu_bind_guest_stage(struct iommu_domain *domain,
> >> struct
> >>>> device *dev,
> >>>>>>> +			   struct iommu_guest_stage_config *cfg)
> >>>>>
> >>>>> About the name change from iommu_bind_pasid_table: is the intent
> to
> >>>>> reuse this API for SMMUv2, which supports nested but not PASID?
> >> Seems
> >>>>> like a good idea but "iommu_bind_table" may be better since "stage"
> is
> >>>>> only used by Arm.
> >>>>
> >>>> At the moment I don't target SMUv2 but just SMMUv3. My focus was
> on
> >>>> nested stage enablement without enabling the multi-CD feature
> (PASID),
> >>>> whish is not supported by the QEMU vSMMUv3. Afterwards I realized
> >> that
> >>>> basically we are pointing to a CD or PASID table and that's about the
> >>>> same. I don't have a strong opinion on the name,
> >> iommu_bind_guest_table
> >>>> or iommu_bind_pasid_table would be fine with me. Indeed "stage" is
> >> ARM
> >>>> vocable (level for Intel?)
> >>>
> >>> Intel uses first level/second level.
> >>>
> >>> iommu_bind_table is a bit confusing. what should people take table as?
> >>> there is PASID table. there is also page table linked in each stage/level.
> >> and
> >>> maybe other tables in vendor-specific definition.
> >>>
> >>> to me iommu_bind_pasid_table is still clearer. anyway in other places
> >>> we've used pasid explicitly in vfio/iommu APIs, then it should be
> general
> >>> enough to represent various implementations.
> >>
> >> Fine for me.
> >>
> >> However I I would suggest to rename the original iommu_sva_invalidate
> >> into something that is SVA unrelated. iommu_tlb_invalidate is not OK as
> >> this API also is used to invalidate context caches - which are not
> >> iotlbs -. What about iommu_cache_invalidate?
> >>
> >> At least we must clarify that this API can be used for something else
> >> than SVA enablement.
> >>
> >
> > Agree. using SVA is limiting.
> >
> > I also agree that iommu_cache_invalidate is better, though I don't think
> > you want to pass guest context cache invalidation to host. that
> information
> > is fully under host control. :-)
> 
> I think the confusion comes from the different terminology used in VTD
> and ARM SMMU spec.
> 
> Your PASID table ~ ARM SMMU Context Descriptor (CD) table
> Your Root Entry/Context Entry ~ ARM SMMU Stream Table Entry (STE)
> 
> So I meant guesr invalidates its Context Descriptor cache. He "owns"
> those. Host owns the STE.
> 

yes, then it makes sense.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-09-04  8:41               ` Auger Eric
  2018-09-04  8:43                 ` Tian, Kevin
@ 2018-09-04  9:53                 ` Jean-Philippe Brucker
  2018-09-05  0:36                   ` Tian, Kevin
  1 sibling, 1 reply; 21+ messages in thread
From: Jean-Philippe Brucker @ 2018-09-04  9:53 UTC (permalink / raw)
  To: Auger Eric, Tian, Kevin, eric.auger.pro, iommu, linux-kernel,
	kvm, kvmarm, joro, alex.williamson, jacob.jun.pan, yi.l.liu,
	Will Deacon, Robin Murphy
  Cc: Marc Zyngier, peter.maydell, Christoffer Dall

On 04/09/2018 09:41, Auger Eric wrote:
> I think the confusion comes from the different terminology used in VTD
> and ARM SMMU spec.
> 
> Your PASID table ~ ARM SMMU Context Descriptor (CD) table
> Your Root Entry/Context Entry ~ ARM SMMU Stream Table Entry (STE)

In past discussions we used "PASID table (entry)" and "device context"
respectively. For clarity we should probably stick to those names unless
we discuss arch-specific patches

Thanks,
Jean

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC 01/13] iommu: Introduce bind_guest_stage API
  2018-09-04  9:53                 ` Jean-Philippe Brucker
@ 2018-09-05  0:36                   ` Tian, Kevin
  0 siblings, 0 replies; 21+ messages in thread
From: Tian, Kevin @ 2018-09-05  0:36 UTC (permalink / raw)
  To: Jean-Philippe Brucker, Auger Eric, eric.auger.pro, iommu,
	linux-kernel, kvm, kvmarm, joro, alex.williamson, jacob.jun.pan,
	yi.l.liu, Will Deacon, Robin Murphy
  Cc: Marc Zyngier, peter.maydell, Christoffer Dall

> From: Jean-Philippe Brucker [mailto:jean-philippe.brucker@arm.com]
> Sent: Tuesday, September 4, 2018 5:53 PM
> 
> On 04/09/2018 09:41, Auger Eric wrote:
> > I think the confusion comes from the different terminology used in VTD
> > and ARM SMMU spec.
> >
> > Your PASID table ~ ARM SMMU Context Descriptor (CD) table
> > Your Root Entry/Context Entry ~ ARM SMMU Stream Table Entry (STE)
> 
> In past discussions we used "PASID table (entry)" and "device context"
> respectively. For clarity we should probably stick to those names unless
> we discuss arch-specific patches
> 

yes, that is a good abstraction. let's try to use them in related discussions
and patch series.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2018-09-05  0:36 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1535026656-8450-1-git-send-email-eric.auger@redhat.com>
     [not found] ` <1535026656-8450-2-git-send-email-eric.auger@redhat.com>
2018-08-23 15:25   ` [RFC 01/13] iommu: Introduce bind_guest_stage API Auger Eric
2018-08-31 13:11     ` Jean-Philippe Brucker
2018-08-31 13:52       ` Auger Eric
2018-09-03 12:19         ` Jean-Philippe Brucker
2018-09-04  7:57         ` Tian, Kevin
2018-09-04  8:10           ` Auger Eric
2018-09-04  8:34             ` Tian, Kevin
2018-09-04  8:41               ` Auger Eric
2018-09-04  8:43                 ` Tian, Kevin
2018-09-04  9:53                 ` Jean-Philippe Brucker
2018-09-05  0:36                   ` Tian, Kevin
     [not found]   ` <A2975661238FB949B60364EF0F2C257439CCE9EE@SHSMSX104.ccr.corp.intel.com>
2018-08-24 13:20     ` Auger Eric
     [not found] ` <1535026656-8450-3-git-send-email-eric.auger@redhat.com>
2018-08-31 13:17   ` [RFC 02/13] iommu: Introduce tlb_invalidate API Jean-Philippe Brucker
2018-08-31 14:07     ` Auger Eric
2018-09-03 12:28       ` Jean-Philippe Brucker
2018-09-03 12:41         ` Auger Eric
2018-09-03 13:41           ` Jean-Philippe Brucker
     [not found] ` <1535026656-8450-10-git-send-email-eric.auger@redhat.com>
2018-08-31 13:20   ` [RFC 09/13] iommu/smmuv3: Get prepared for nested stage support Jean-Philippe Brucker
2018-08-31 14:11     ` Auger Eric
2018-09-03 12:29       ` Jean-Philippe Brucker
2018-09-03 12:48         ` Auger Eric

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).