All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: Eric Auger <eric.auger@redhat.com>
Cc: bpf@vger.kernel.org, Jonathan Corbet <corbet@lwn.net>,
	David Woodhouse <dwmw2@infradead.org>,
	iommu@lists.linux.dev, Joerg Roedel <joro@8bytes.org>,
	Kevin Tian <kevin.tian@intel.com>,
	linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org,
	llvm@lists.linux.dev, Nathan Chancellor <nathan@kernel.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Miguel Ojeda <ojeda@kernel.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Shuah Khan <shuah@kernel.org>,
	Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>,
	Tom Rix <trix@redhat.com>, Will Deacon <will@kernel.org>,
	Anthony Krowiak <akrowiak@linux.ibm.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Bagas Sanjaya <bagasdotme@gmail.com>,
	Lu Baolu <baolu.lu@linux.intel.com>,
	Chaitanya Kulkarni <chaitanyak@nvidia.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Daniel Jordan <daniel.m.jordan@oracle.com>,
	David Gibson <david@gibson.dropbear.id.au>,
	Eric Farman <farman@linux.ibm.com>,
	Jason Wang <jasowang@redhat.com>,
	Jean-Philippe Brucker <jean-philippe@linaro.org>,
	Jason Herne <jjherne@linux.ibm.com>,
	Joao Martins <joao.m.martins@oracle.com>,
	kvm@vger.kernel.org, Lixiao Yang <lixiao.yang@intel.com>,
	Matthew Rosato <mjrosato@linux.ibm.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Nicolin Chen <nicolinc@nvidia.com>,
	Halil Pasic <pasic@linux.ibm.com>,
	Niklas Schnelle <schnelle@linux.ibm.com>,
	Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>,
	Yi Liu <yi.l.liu@intel.com>, Keqian Zhu <zhukeqian1@huawei.com>
Subject: Re: [PATCH v5 11/19] iommufd: IOCTLs for the io_pagetable
Date: Mon, 28 Nov 2022 14:27:06 -0400	[thread overview]
Message-ID: <Y4T9ejjPETS3TPx7@nvidia.com> (raw)
In-Reply-To: <16bcfd63-2803-8000-7725-b42cd05061fa@redhat.com>

On Sun, Nov 27, 2022 at 06:49:29PM +0100, Eric Auger wrote:

> > +static int iommufd_ioas_load_iovas(struct rb_root_cached *itree,
> > +				   struct iommu_iova_range __user *ranges,
> > +				   u32 num)
> > +{
> > +	u32 i;
> > +
> > +	for (i = 0; i != num; i++) {

> shouldn't it be < ?

It is logically equivalent

> > +int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd)
> > +{
> > +	struct iommu_ioas_allow_iovas *cmd = ucmd->cmd;
> > +	struct rb_root_cached allowed_iova = RB_ROOT_CACHED;
> > +	struct interval_tree_node *node;
> > +	struct iommufd_ioas *ioas;
> > +	struct io_pagetable *iopt;
> > +	int rc = 0;
> > +
> > +	if (cmd->__reserved)
> > +		return -EOPNOTSUPP;
> > +
> > +	ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
> > +	if (IS_ERR(ioas))
> > +		return PTR_ERR(ioas);
> > +	iopt = &ioas->iopt;
> > +
> > +	rc = iommufd_ioas_load_iovas(&allowed_iova,
> > +				     u64_to_user_ptr(cmd->allowed_iovas),
> > +				     cmd->num_iovas);
> > +	if (rc)
> > +		goto out_free;
> > +
> > +	rc = iopt_set_allow_iova(iopt, &allowed_iova);
> Please can you add a comment about why you need to proceed in 2 steps,
> ie. add the ranges in a first tree and then 'swap' to the
> iopt->allowed_tree (and eventually delete the first tree)?

Sure

	/*
	 * We want the allowed tree update to be atomic, so we have to keep the
	 * original nodes around, and keep track of the new nodes as we allocate
	 * memory for them. The simplest solution is to have a new/old tree and
	 * then swap new for old. On success we free the old tree, on failure we
	 * free the new tree.
	 */

> > +static int conv_iommu_prot(u32 map_flags)
> > +{
> > +	int iommu_prot;
> > +
> > +	/*
> > +	 * We provide no manual cache coherency ioctls to userspace and most
> > +	 * architectures make the CPU ops for cache flushing privileged.
> > +	 * Therefore we require the underlying IOMMU to support CPU coherent
> > +	 * operation. Support for IOMMU_CACHE is enforced by the
> > +	 * IOMMU_CAP_CACHE_COHERENCY test during bind.
> > +	 */
> > +	iommu_prot = IOMMU_CACHE;
> at init?

done

> > +int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
> > +{
> > +	struct iommu_ioas_map *cmd = ucmd->cmd;
> > +	struct iommufd_ioas *ioas;
> > +	unsigned int flags = 0;
> > +	unsigned long iova;
> > +	int rc;
> > +
> > +	if ((cmd->flags &
> > +	     ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
> > +	       IOMMU_IOAS_MAP_READABLE)) ||
> > +	    cmd->__reserved)
> > +		return -EOPNOTSUPP;
> > +	if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX)
> > +		return -EOVERFLOW;
> > +
> > +	ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
> > +	if (IS_ERR(ioas))
> > +		return PTR_ERR(ioas);
> > +
> > +	if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
> > +		flags = IOPT_ALLOC_IOVA;
> > +	iova = cmd->iova;
> can be done either at initialization or only if MAP_FIXED_IOVA.

Done


> > +int iommufd_option_rlimit_mode(struct iommu_option *cmd,
> > +			       struct iommufd_ctx *ictx)
> > +{
> *object_id  and __reserved should be checked as per the uapi doc*

Ohh, yes, thanks:

@@ -317,6 +322,9 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
                               struct iommufd_ctx *ictx)
 {
+       if (cmd->object_id)
+               return -EOPNOTSUPP;
+
        if (cmd->op == IOMMU_OPTION_OP_GET) {
                cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM;
                return 0;
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index de5cc01023c0c5..bcb463e581009c 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -215,6 +215,9 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
        struct iommu_option *cmd = ucmd->cmd;
        int rc;
 
+       if (cmd->__reserved)
+               return -EOPNOTSUPP;
+
        switch (cmd->option_id) {
        case IOMMU_OPTION_RLIMIT_MODE:
                rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx);

> > +/**
> > + * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
> > + * @size: sizeof(struct iommu_ioas_iova_ranges)
> > + * @ioas_id: IOAS ID to read ranges from
> > + * @num_iovas: Input/Output total number of ranges in the IOAS
> > + * @__reserved: Must be 0
> > + * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
> > + * @out_iova_alignment: Minimum alignment required for mapping IOVA
> > + *
> > + * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
> > + * is not allowed. num_iovas will be set to the total number of iovas and
> > + * the allowed_iovas[] will be filled in as space permits.
> > + *
> > + * The allowed ranges are dependent on the HW path the DMA operation takes, and
> > + * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
> > + * full range, and each attached device will narrow the ranges based on that
> > + * device's HW restrictions. Detatching a device can widen the ranges. Userspace
> detaching
> > + * should query ranges after every attach/detatch to know what IOVAs are valid
> detach

Done

> > + * for mapping.
> > + *
> > + * On input num_iovas is the length of the allowed_iovas array. On output it is
> > + * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
> > + * num_iovas to the required value if num_iovas is too small. In this case the
> > + * caller should allocate a larger output array and re-issue the ioctl.
> > + */
> > +struct iommu_ioas_iova_ranges {
> > +	__u32 size;
> > +	__u32 ioas_id;
> > +	__u32 num_iovas;
> > +	__u32 __reserved;
> > +	__aligned_u64 allowed_iovas;
> > +	__aligned_u64 out_iova_alignment;
> document @out_iova_alignment?

 * out_iova_alignment returns the minimum IOVA alignment that can be given
 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:
 *   starting_iova % out_iova_alignment == 0
 *   (starting_iova + length) % out_iova_alignment == 0
 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
 * be higher than the system PAGE_SIZE.

> > +/**
> > + * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
> > + * @size: sizeof(struct iommu_ioas_map)
> > + * @flags: Combination of enum iommufd_ioas_map_flags
> > + * @ioas_id: IOAS ID to change the mapping of
> > + * @__reserved: Must be 0
> > + * @user_va: Userspace pointer to start mapping from
> > + * @length: Number of bytes to map
> > + * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
> > + *        then this must be provided as input.
> > + *
> > + * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
> > + * mapping will be established at iova, otherwise a suitable location based on
> > + * the reserved and allowed lists will be automatically selected and returned in
> > + * iova.
> You do not mention anything about the fact the IOCTL cannot be called
> twice for a given @user_va w/ FIXED_IOVA
> Refering to VFIO_DMA_MAP_FLAG_VADDR.

 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
 * be unused, existing IOVA cannot be replaced.

> > +/**
> > + * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
> > + * @size: sizeof(struct iommu_ioas_copy)
> > + * @flags: Combination of enum iommufd_ioas_map_flags
> > + * @dst_ioas_id: IOAS ID to change the mapping of
> > + * @src_ioas_id: IOAS ID to copy from
> > + * @length: Number of bytes to copy and map
> > + * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
> > + *            set then this must be provided as input.
> > + * @src_iova: IOVA to start the copy
> > + *
> > + * Copy an already existing mapping from src_ioas_id and establish it in
> > + * dst_ioas_id. The src iova/length must exactly match a range used with
> > + * IOMMU_IOAS_MAP.
> > + *
> > + * This may be used to efficiently clone a subset of an IOAS to another, or as a
> > + * kind of 'cache' to speed up mapping. Copy has an effciency advantage over
> efficiency
> > + * establishing equivalent new mappings, as internal resources are shared, and
> > + * the kernel will pin the user memory only once.
> > + */
> > +struct iommu_ioas_copy {
> > +	__u32 size;
> > +	__u32 flags;
> > +	__u32 dst_ioas_id;
> > +	__u32 src_ioas_id;
> is src_ioas_id == dst_ioas_id allowed?

Yes

> > +/**
> > + * struct iommu_option - iommu option multiplexer
> > + * @size: sizeof(struct iommu_option)
> > + * @option_id: One of enum iommufd_option
> > + * @op: One of enum iommufd_option_ops
> > + * @__reserved: Must be 0
> > + * @object_id: ID of the object if required
> > + * @val64: Option value to set or value returned on get
> > + *
> > + * Change a simple option value. This multiplexor allows controlling a options
> s/a options/options

Done

Thanks,
Jason

  parent reply	other threads:[~2022-11-28 18:27 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-16 21:00 [PATCH v5 00/19] IOMMUFD Generic interface Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 01/19] iommu: Add IOMMU_CAP_ENFORCE_CACHE_COHERENCY Jason Gunthorpe
2022-11-23  8:30   ` Yi Liu
2022-11-23 16:56     ` Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 02/19] iommu: Add device-centric DMA ownership interfaces Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 03/19] interval-tree: Add a utility to iterate over spans in an interval tree Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 04/19] scripts/kernel-doc: support EXPORT_SYMBOL_NS_GPL() with -export Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 05/19] iommufd: Document overview of iommufd Jason Gunthorpe
2022-11-18  9:06   ` Eric Auger
2022-11-30 15:06   ` Binbin Wu
2022-12-01  0:08     ` Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 06/19] iommufd: File descriptor, context, kconfig and makefiles Jason Gunthorpe
2022-11-18 16:27   ` Eric Auger
2022-11-18 20:23     ` Jason Gunthorpe
2022-11-25  8:43       ` Eric Auger
2022-11-16 21:00 ` [PATCH v5 07/19] kernel/user: Allow user::locked_vm to be usable for iommufd Jason Gunthorpe
2022-11-18  9:08   ` Eric Auger
2022-11-18  9:09   ` Eric Auger
2022-11-18 16:28   ` Eric Auger
2022-11-18 20:25     ` Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 08/19] iommufd: PFN handling for iopt_pages Jason Gunthorpe
2022-11-18  2:24   ` Tian, Kevin
2022-11-18  2:27     ` Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 09/19] iommufd: Algorithms for PFN storage Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 10/19] iommufd: Data structure to provide IOVA to PFN mapping Jason Gunthorpe
2022-11-18  2:55   ` Tian, Kevin
2022-11-16 21:00 ` [PATCH v5 11/19] iommufd: IOCTLs for the io_pagetable Jason Gunthorpe
2022-11-27 17:49   ` Eric Auger
2022-11-28  9:05     ` Tian, Kevin
2022-11-28 18:11       ` Jason Gunthorpe
2022-11-28 18:27     ` Jason Gunthorpe [this message]
2022-11-28 20:09       ` Eric Auger
2022-11-16 21:00 ` [PATCH v5 12/19] iommufd: Add a HW pagetable object Jason Gunthorpe
2022-11-27 15:12   ` Eric Auger
2022-11-16 21:00 ` [PATCH v5 13/19] iommufd: Add kAPI toward external drivers for physical devices Jason Gunthorpe
2022-11-27 21:13   ` Eric Auger
2022-11-28  0:14     ` Jason Gunthorpe
2022-11-28 10:55       ` Eric Auger
2022-11-28 13:20         ` Jason Gunthorpe
2022-11-28 14:17           ` Eric Auger
2022-11-29  1:09             ` Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 14/19] iommufd: Add kAPI toward external drivers for kernel access Jason Gunthorpe
2022-11-28 15:48   ` Eric Auger
2022-11-28 18:56     ` Jason Gunthorpe
2022-12-06 20:40       ` Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 15/19] iommufd: vfio container FD ioctl compatibility Jason Gunthorpe
2022-11-18  2:58   ` Tian, Kevin
2022-11-18 15:22     ` Jason Gunthorpe
2022-11-23  1:33       ` Tian, Kevin
2022-11-23  4:31         ` Jason Wang
2022-11-23 13:03         ` Jason Gunthorpe
2022-11-24  5:23           ` Tian, Kevin
2022-11-28 17:53   ` Eric Auger
2022-11-28 19:37     ` Jason Gunthorpe
2022-11-28 20:54       ` Eric Auger
2022-11-16 21:00 ` [PATCH v5 16/19] iommufd: Add kernel support for testing iommufd Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 17/19] iommufd: Add some fault injection points Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 18/19] iommufd: Add additional invariant assertions Jason Gunthorpe
2022-11-16 21:00 ` [PATCH v5 19/19] iommufd: Add a selftest Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y4T9ejjPETS3TPx7@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=akrowiak@linux.ibm.com \
    --cc=alex.williamson@redhat.com \
    --cc=bagasdotme@gmail.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=chaitanyak@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=corbet@lwn.net \
    --cc=daniel.m.jordan@oracle.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=dwmw2@infradead.org \
    --cc=eric.auger@redhat.com \
    --cc=farman@linux.ibm.com \
    --cc=iommu@lists.linux.dev \
    --cc=jasowang@redhat.com \
    --cc=jean-philippe@linaro.org \
    --cc=jjherne@linux.ibm.com \
    --cc=joao.m.martins@oracle.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=lixiao.yang@intel.com \
    --cc=llvm@lists.linux.dev \
    --cc=mjrosato@linux.ibm.com \
    --cc=mst@redhat.com \
    --cc=nathan@kernel.org \
    --cc=ndesaulniers@google.com \
    --cc=nicolinc@nvidia.com \
    --cc=ojeda@kernel.org \
    --cc=pasic@linux.ibm.com \
    --cc=robin.murphy@arm.com \
    --cc=schnelle@linux.ibm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=shuah@kernel.org \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=trix@redhat.com \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    --cc=zhukeqian1@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.