linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
To: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Cc: kvm@vger.kernel.org, linux-pci@vger.kernel.org,
	xuzaibo@huawei.com, will.deacon@arm.com, okaya@codeaurora.org,
	linux-mm@kvack.org, ashok.raj@intel.com, bharatku@xilinx.com,
	linux-acpi@vger.kernel.org, rfranz@cavium.com,
	devicetree@vger.kernel.org, rgummal@xilinx.com,
	linux-arm-kernel@lists.infradead.org, dwmw2@infradead.org,
	ilias.apalodimas@linaro.org, iommu@lists.linux-foundation.org,
	christian.koenig@amd.com, jacob.jun.pan@linux.intel.com
Subject: Re: [PATCH v2 07/40] iommu: Add a page fault handler
Date: Tue, 22 May 2018 16:35:21 -0700	[thread overview]
Message-ID: <20180522163521.413e60c6@jacob-builder> (raw)
In-Reply-To: <8a640794-a6f3-fa01-82a9-06479a6f779a@arm.com>

On Mon, 21 May 2018 15:49:40 +0100
Jean-Philippe Brucker <jean-philippe.brucker@arm.com> wrote:

> On 18/05/18 19:04, Jacob Pan wrote:
> >> +struct iopf_context {
> >> +	struct device			*dev;
> >> +	struct iommu_fault_event	evt;
> >> +	struct list_head		head;
> >> +};
> >> +
> >> +struct iopf_group {
> >> +	struct iopf_context		last_fault;
> >> +	struct list_head		faults;
> >> +	struct work_struct		work;
> >> +};
> >> +  
> > All the page requests in the group should belong to the same device,
> > perhaps struct device tracking should be in iopf_group instead of
> > iopf_context?  
> 
> Right, this is a leftover from when we kept a global list of partial
> faults. Since the list is now per-device, I can move the dev pointer
> (I think I should also rename iopf_context to iopf_fault, if that's
> all right)
> 
> >> +static int iopf_complete(struct device *dev, struct
> >> iommu_fault_event *evt,
> >> +			 enum page_response_code status)
> >> +{
> >> +	struct page_response_msg resp = {
> >> +		.addr			= evt->addr,
> >> +		.pasid			= evt->pasid,
> >> +		.pasid_present		= evt->pasid_valid,
> >> +		.page_req_group_id	=
> >> evt->page_req_group_id,
> >> +		.private_data		= evt->iommu_private,
> >> +		.resp_code		= status,
> >> +	};
> >> +
> >> +	return iommu_page_response(dev, &resp);
> >> +}
> >> +
> >> +static enum page_response_code
> >> +iopf_handle_single(struct iopf_context *fault)
> >> +{
> >> +	/* TODO */
> >> +	return -ENODEV;
> >> +}
> >> +
> >> +static void iopf_handle_group(struct work_struct *work)
> >> +{
> >> +	struct iopf_group *group;
> >> +	struct iopf_context *fault, *next;
> >> +	enum page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
> >> +
> >> +	group = container_of(work, struct iopf_group, work);
> >> +
> >> +	list_for_each_entry_safe(fault, next, &group->faults,
> >> head) {
> >> +		struct iommu_fault_event *evt = &fault->evt;
> >> +		/*
> >> +		 * Errors are sticky: don't handle subsequent
> >> faults in the
> >> +		 * group if there is an error.
> >> +		 */  
> > There might be performance benefit for certain device to continue in
> > spite of error in that the ATS retry may have less fault. Perhaps a
> > policy decision for later enhancement.  
> 
> Yes I think we should leave it to future work. My current reasoning is
> that subsequent requests are more likely to fail as well and reporting
> the error is more urgent, but we need real workloads to confirm this.
> 
> >> +		if (status == IOMMU_PAGE_RESP_SUCCESS)
> >> +			status = iopf_handle_single(fault);
> >> +
> >> +		if (!evt->last_req)
> >> +			kfree(fault);
> >> +	}
> >> +
> >> +	iopf_complete(group->last_fault.dev,
> >> &group->last_fault.evt, status);
> >> +	kfree(group);
> >> +}
> >> +
> >> +/**
> >> + * iommu_queue_iopf - IO Page Fault handler
> >> + * @evt: fault event
> >> + * @cookie: struct device, passed to
> >> iommu_register_device_fault_handler.
> >> + *
> >> + * Add a fault to the device workqueue, to be handled by mm.
> >> + */
> >> +int iommu_queue_iopf(struct iommu_fault_event *evt, void *cookie)
> >> +{
> >> +	struct iopf_group *group;
> >> +	struct iopf_context *fault, *next;
> >> +	struct iopf_device_param *iopf_param;
> >> +
> >> +	struct device *dev = cookie;
> >> +	struct iommu_param *param = dev->iommu_param;
> >> +
> >> +	if (WARN_ON(!mutex_is_locked(&param->lock)))
> >> +		return -EINVAL;
> >> +
> >> +	if (evt->type != IOMMU_FAULT_PAGE_REQ)
> >> +		/* Not a recoverable page fault */
> >> +		return IOMMU_PAGE_RESP_CONTINUE;
> >> +
> >> +	/*
> >> +	 * As long as we're holding param->lock, the queue can't
> >> be unlinked
> >> +	 * from the device and therefore cannot disappear.
> >> +	 */
> >> +	iopf_param = param->iopf_param;
> >> +	if (!iopf_param)
> >> +		return -ENODEV;
> >> +
> >> +	if (!evt->last_req) {
> >> +		fault = kzalloc(sizeof(*fault), GFP_KERNEL);
> >> +		if (!fault)
> >> +			return -ENOMEM;
> >> +
> >> +		fault->evt = *evt;
> >> +		fault->dev = dev;
> >> +
> >> +		/* Non-last request of a group. Postpone until the
> >> last one */
> >> +		list_add(&fault->head, &iopf_param->partial);
> >> +
> >> +		return IOMMU_PAGE_RESP_HANDLED;
> >> +	}
> >> +
> >> +	group = kzalloc(sizeof(*group), GFP_KERNEL);
> >> +	if (!group)
> >> +		return -ENOMEM;
> >> +
> >> +	group->last_fault.evt = *evt;
> >> +	group->last_fault.dev = dev;
> >> +	INIT_LIST_HEAD(&group->faults);
> >> +	list_add(&group->last_fault.head, &group->faults);
> >> +	INIT_WORK(&group->work, iopf_handle_group);
> >> +
> >> +	/* See if we have partial faults for this group */
> >> +	list_for_each_entry_safe(fault, next,
> >> &iopf_param->partial, head) {
> >> +		if (fault->evt.page_req_group_id ==
> >> evt->page_req_group_id)  
> > If all 9 bit group index are used, there can be lots of PRGs. For
> > future enhancement, maybe we can have per group partial list ready
> > to go when LPIG comes in? I will be less searching.  
> 
> Yes, allocating the PRG from the start could be more efficient. I
> think it's slightly more complicated code so I'd rather see
> performance numbers before implementing it
> 
> >> +			/* Insert *before* the last fault */
> >> +			list_move(&fault->head, &group->faults);
> >> +	}
> >> +  
> > If you already sorted the group list with last fault at the end,
> > why do you need a separate entry to track the last fault?  
> 
> Not sure I understand your question, sorry. Do you mean why the
> iopf_group.last_fault? Just to avoid one more kzalloc.
> 
kind of :) what i thought was that why can't the last_fault naturally
be the last entry in your group fault list? then there is no need for
special treatment in terms of allocation of the last fault. just my
preference.
> >> +
> >> +	queue->flush(queue->flush_arg, dev);
> >> +
> >> +	/*
> >> +	 * No need to clear the partial list. All PRGs containing
> >> the PASID that
> >> +	 * needs to be decommissioned are whole (the device driver
> >> made sure of
> >> +	 * it before this function was called). They have been
> >> submitted to the
> >> +	 * queue by the above flush().
> >> +	 */  
> > So you are saying device driver need to make sure LPIG PRQ is
> > submitted in the flush call above such that partial list is
> > cleared?  
> 
> Not exactly, it's the IOMMU driver that makes sure all LPIG in its
> queues are submitted by the above flush call. In more details the
> flow is:
> 
> * Either device driver calls unbind()/sva_device_shutdown(), or the
> process exits.
> * If the device driver called, then it already told the device to stop
> using the PASID. Otherwise we use the mm_exit() callback to tell the
> device driver to stop using the PASID.
> * In either case, when receiving a stop request from the driver, the
> device sends the LPIGs to the IOMMU queue.
> * Then, the flush call above ensures that the IOMMU reports the LPIG
> with iommu_report_device_fault.
> * While submitting all LPIGs for this PASID to the work queue,
> ipof_queue_fault also picked up all partial faults, so the partial
> list is clean.
> 
> Maybe I should improve this comment?
> 
thanks for explaining. LPIG submission is done by device asynchronously
w.r.t. driver stopping/decommission PASID. so if we were to use this
flow on vt-d, which does not stall page request queue, then we should
use the iommu model specific flush() callback to ensure LPIG is
received? There could be queue full condition and retry. I am just
trying to understand how and where we can make sure LPIG is
received and all groups are whole.

> > what if
> > there are device failures where device needs to reset (either whole
> > function level or PASID level), should there still be a need to
> > clear the partial list for all associated PASID/group?  
> 
> I guess the simplest way out, when resetting the device, is for the
> device driver to call iommu_sva_device_shutdown(). Both the IOMMU
> driver's sva_device_shutdown() and remove_device() ops should call
> iopf_queue_remove_device(), which clears the partial list.
> 
yes. I think that should work for functional reset.
> Thanks,
> Jean

[Jacob Pan]

  reply	other threads:[~2018-05-22 23:32 UTC|newest]

Thread overview: 119+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-11 19:06 [PATCH v2 00/40] Shared Virtual Addressing for the IOMMU Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 01/40] iommu: Introduce Shared Virtual Addressing API Jean-Philippe Brucker
2018-05-16 20:41   ` Jacob Pan
2018-05-17 10:02     ` Jean-Philippe Brucker
2018-05-17 17:00       ` Jacob Pan
2018-09-05 11:29   ` Auger Eric
2018-09-06 11:09     ` Jean-Philippe Brucker
2018-09-06 11:12       ` Christian König
2018-09-06 12:45         ` Jean-Philippe Brucker
2018-09-07  8:55           ` Christian König
2018-09-07 15:45             ` Jean-Philippe Brucker
2018-09-07 18:02               ` Christian König
2018-09-07 21:25                 ` Jacob Pan
2018-09-08  7:29                   ` Christian König
2018-09-12 12:40                     ` Jean-Philippe Brucker
2018-09-12 12:56                       ` Christian König
2018-09-13  7:15                   ` Tian, Kevin
2018-09-13  7:26             ` Tian, Kevin
2018-05-11 19:06 ` [PATCH v2 02/40] iommu/sva: Bind process address spaces to devices Jean-Philippe Brucker
2018-05-17 13:10   ` Jonathan Cameron
2018-05-21 14:43     ` Jean-Philippe Brucker
2018-09-05 11:29   ` Auger Eric
2018-09-06 11:09     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 03/40] iommu/sva: Manage process address spaces Jean-Philippe Brucker
2018-05-16 23:31   ` Jacob Pan
2018-05-17 10:02     ` Jean-Philippe Brucker
2018-05-22 16:43       ` Jacob Pan
2018-05-24 11:44         ` Jean-Philippe Brucker
2018-05-24 11:50           ` Ilias Apalodimas
2018-05-24 15:04             ` Jean-Philippe Brucker
2018-05-25  6:33               ` Ilias Apalodimas
2018-05-25  8:39                 ` Jonathan Cameron
2018-05-26  2:24                   ` Kenneth Lee
     [not found]                   ` <20180526022445.GA6069@kllp05>
2018-06-11 16:10                     ` Kenneth Lee
2018-06-11 16:32                   ` Kenneth Lee
2018-05-17 14:25   ` Jonathan Cameron
2018-05-21 14:44     ` Jean-Philippe Brucker
2018-09-05 12:14   ` Auger Eric
2018-09-05 18:18     ` Jacob Pan
2018-09-06 17:40       ` Jean-Philippe Brucker
2018-09-06 11:10     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 04/40] iommu/sva: Add a mm_exit callback for device drivers Jean-Philippe Brucker
2018-09-05 13:23   ` Auger Eric
2018-09-06 11:10     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 05/40] iommu/sva: Track mm changes with an MMU notifier Jean-Philippe Brucker
2018-05-17 14:25   ` Jonathan Cameron
2018-05-21 14:44     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 06/40] iommu/sva: Search mm by PASID Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 07/40] iommu: Add a page fault handler Jean-Philippe Brucker
2018-05-17 15:25   ` Jonathan Cameron
2018-05-21 14:48     ` Jean-Philippe Brucker
2018-05-18 18:04   ` Jacob Pan
2018-05-21 14:49     ` Jean-Philippe Brucker
2018-05-22 23:35       ` Jacob Pan [this message]
2018-05-24 11:44         ` Jean-Philippe Brucker
2018-05-26  0:35           ` Jacob Pan
2018-05-29 10:00             ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 08/40] iommu/iopf: Handle mm faults Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 09/40] iommu/sva: Register page fault handler Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 10/40] mm: export symbol mm_access Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 11/40] mm: export symbol find_get_task_by_vpid Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 12/40] mm: export symbol mmput_async Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 13/40] vfio: Add support for Shared Virtual Addressing Jean-Philippe Brucker
2018-05-17 15:58   ` Jonathan Cameron
2018-05-21 14:51     ` Jean-Philippe Brucker
2018-05-23  9:38   ` Xu Zaibo
2018-05-24 11:44     ` Jean-Philippe Brucker
     [not found]       ` <5B06B17C.1090809@huawei.com>
2018-05-24 15:04         ` Jean-Philippe Brucker
2018-05-25  2:39           ` Xu Zaibo
2018-05-25  9:47             ` Jean-Philippe Brucker
     [not found]               ` <5B08DA21.3070507@huawei.com>
2018-05-29 11:55                 ` Jean-Philippe Brucker
2018-05-29 12:24                   ` Xu Zaibo
2018-08-27  8:06   ` Xu Zaibo
2018-08-31 13:34     ` Jean-Philippe Brucker
2018-09-01  2:23       ` Xu Zaibo
2018-09-03 10:34         ` Jean-Philippe Brucker
2018-09-04  2:12           ` Xu Zaibo
2018-09-04 10:57             ` Jean-Philippe Brucker
2018-09-05  3:15               ` Xu Zaibo
2018-09-05 11:02                 ` Jean-Philippe Brucker
2018-09-06  7:26                   ` Xu Zaibo
2018-05-11 19:06 ` [PATCH v2 14/40] dt-bindings: document stall and PASID properties for IOMMU masters Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 15/40] iommu/of: Add stall and pasid properties to iommu_fwspec Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 16/40] arm64: mm: Pin down ASIDs for sharing mm with devices Jean-Philippe Brucker
2018-05-15 14:16   ` Catalin Marinas
2018-05-17 10:01     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 17/40] iommu/arm-smmu-v3: Link domains and devices Jean-Philippe Brucker
2018-05-17 16:07   ` Jonathan Cameron
2018-05-21 14:49     ` Jean-Philippe Brucker
2018-09-10 15:16   ` Auger Eric
2018-05-11 19:06 ` [PATCH v2 18/40] iommu/io-pgtable-arm: Factor out ARM LPAE register defines Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 19/40] iommu: Add generic PASID table library Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 20/40] iommu/arm-smmu-v3: Move context descriptor code Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 21/40] iommu/arm-smmu-v3: Add support for Substream IDs Jean-Philippe Brucker
2018-05-31 11:01   ` Bharat Kumar Gogada
2018-06-01 10:46     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 22/40] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 23/40] iommu/arm-smmu-v3: Share process page tables Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 24/40] iommu/arm-smmu-v3: Seize private ASID Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 25/40] iommu/arm-smmu-v3: Add support for VHE Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 26/40] iommu/arm-smmu-v3: Enable broadcast TLB maintenance Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 27/40] iommu/arm-smmu-v3: Add SVA feature checking Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 28/40] iommu/arm-smmu-v3: Implement mm operations Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 29/40] iommu/arm-smmu-v3: Add support for Hardware Translation Table Update Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 30/40] iommu/arm-smmu-v3: Register I/O Page Fault queue Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 31/40] iommu/arm-smmu-v3: Improve add_device error handling Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 32/40] iommu/arm-smmu-v3: Maintain a SID->device structure Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 33/40] iommu/arm-smmu-v3: Add stall support for platform devices Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 34/40] ACPI/IORT: Check ATS capability in root complex nodes Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 35/40] iommu/arm-smmu-v3: Add support for PCI ATS Jean-Philippe Brucker
2018-05-19 17:25   ` Sinan Kaya
2018-05-21 14:52     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 36/40] iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 37/40] iommu/arm-smmu-v3: Disable tagged pointers Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 38/40] PCI: Make "PRG Response PASID Required" handling common Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 39/40] iommu/arm-smmu-v3: Add support for PRI Jean-Philippe Brucker
2018-05-25 14:08   ` Bharat Kumar Gogada
2018-05-29 10:27     ` Jean-Philippe Brucker
2018-05-11 19:06 ` [PATCH v2 40/40] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180522163521.413e60c6@jacob-builder \
    --to=jacob.jun.pan@linux.intel.com \
    --cc=ashok.raj@intel.com \
    --cc=bharatku@xilinx.com \
    --cc=christian.koenig@amd.com \
    --cc=devicetree@vger.kernel.org \
    --cc=dwmw2@infradead.org \
    --cc=ilias.apalodimas@linaro.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jean-philippe.brucker@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=okaya@codeaurora.org \
    --cc=rfranz@cavium.com \
    --cc=rgummal@xilinx.com \
    --cc=will.deacon@arm.com \
    --cc=xuzaibo@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).