dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: Andrey Grodzovsky <andrey.grodzovsky@amd.com>,
	amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	daniel.vetter@ffwll.ch, robh@kernel.org, l.stach@pengutronix.de,
	yuq825@gmail.com, eric@anholt.net
Cc: Alexander.Deucher@amd.com, gregkh@linuxfoundation.org
Subject: Re: [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.
Date: Tue, 19 Jan 2021 09:48:03 +0100	[thread overview]
Message-ID: <2978ebf8-858d-6e8e-5657-1d0d615d56e0@gmail.com> (raw)
In-Reply-To: <1611003683-3534-8-git-send-email-andrey.grodzovsky@amd.com>

Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> Handle all DMA IOMMU gropup related dependencies before the
> group is removed.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>   6 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 478a7d8..2953420 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -51,6 +51,7 @@
>   #include <linux/dma-fence.h>
>   #include <linux/pci.h>
>   #include <linux/aer.h>
> +#include <linux/notifier.h>
>   
>   #include <drm/ttm/ttm_bo_api.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>   
>   	bool                            in_pci_err_recovery;
>   	struct pci_saved_state          *pci_state;
> +
> +	struct notifier_block		nb;
> +	struct blocking_notifier_head	notifier;
> +	struct list_head		device_bo_list;
>   };
>   
>   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 45e23e3..e99f4f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -70,6 +70,8 @@
>   #include <drm/task_barrier.h>
>   #include <linux/pm_runtime.h>
>   
> +#include <linux/iommu.h>
> +
>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>   };
>   
>   
> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> +				     unsigned long action, void *data)
> +{
> +	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> +	struct amdgpu_bo *bo = NULL;
> +
> +	/*
> +	 * Following is a set of IOMMU group dependencies taken care of before
> +	 * device's IOMMU group is removed
> +	 */
> +	if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> +
> +		spin_lock(&ttm_bo_glob.lru_lock);
> +		list_for_each_entry(bo, &adev->device_bo_list, bo) {
> +			if (bo->tbo.ttm)
> +				ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> +		}
> +		spin_unlock(&ttm_bo_glob.lru_lock);

That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the 
device_bo_list without a lock in this moment.

Christian.

> +
> +		if (adev->irq.ih.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> +		if (adev->irq.ih1.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> +		if (adev->irq.ih2.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> +
> +		amdgpu_gart_dummy_page_fini(adev);
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +
>   /**
>    * amdgpu_device_init - initialize the driver
>    *
> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   
>   	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>   
> +	INIT_LIST_HEAD(&adev->device_bo_list);
> +
>   	adev->gfx.gfx_off_req_count = 1;
>   	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>   
> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	if (amdgpu_device_cache_pci_state(adev->pdev))
>   		pci_restore_state(pdev);
>   
> +	BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> +	adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> +
> +	if (adev->dev->iommu_group) {
> +		r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> +		if (r)
> +			goto failed;
> +	}
> +
>   	return 0;
>   
>   failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 0db9330..486ad6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>    *
>    * Frees the dummy page used by the driver (all asics).
>    */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>   {
>   	if (!adev->dummy_page_addr)
>   		return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index afa2e28..5678d9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>   int amdgpu_gart_init(struct amdgpu_device *adev);
>   void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   		       int pages);
>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6cc9919..4a1de69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>   	}
>   	amdgpu_bo_unref(&bo->parent);
>   
> +	spin_lock(&ttm_bo_glob.lru_lock);
> +	list_del(&bo->bo);
> +	spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   	kfree(bo->metadata);
>   	kfree(bo);
>   }
> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>   	if (bp->type == ttm_bo_type_device)
>   		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>   
> +	INIT_LIST_HEAD(&bo->bo);
> +
> +	spin_lock(&ttm_bo_glob.lru_lock);
> +	list_add_tail(&bo->bo, &adev->device_bo_list);
> +	spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   	return 0;
>   
>   fail_unreserve:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 9ac3756..5ae8555 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>   	struct list_head		shadow_list;
>   
>   	struct kgd_mem                  *kfd_bo;
> +
> +	struct list_head		bo;
>   };
>   
>   static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2021-01-19  8:48 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-18 21:01 [PATCH v4 00/14] RFC Support hot device unplug in amdgpu Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 01/14] drm/ttm: Remap all page faults to per process dummy page Andrey Grodzovsky
2021-01-18 21:48   ` Alex Deucher
2021-01-19  8:41   ` Christian König
2021-01-19 13:56   ` Daniel Vetter
2021-01-25 15:28     ` Andrey Grodzovsky
2021-01-27 14:29       ` Andrey Grodzovsky
2021-02-02 14:21         ` Daniel Vetter
2021-01-18 21:01 ` [PATCH v4 02/14] drm: Unamp the entire device address space on device unplug Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 03/14] drm/ttm: Expose ttm_tt_unpopulate for driver use Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 04/14] drm/sched: Cancel and flush all oustatdning jobs before finish Andrey Grodzovsky
2021-01-18 21:49   ` Alex Deucher
2021-01-19  8:42   ` Christian König
2021-01-19  9:50     ` Christian König
2021-01-18 21:01 ` [PATCH v4 05/14] drm/amdgpu: Split amdgpu_device_fini into early and late Andrey Grodzovsky
2021-01-19  8:45   ` Christian König
2021-01-18 21:01 ` [PATCH v4 06/14] drm/amdgpu: Add early fini callback Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device Andrey Grodzovsky
2021-01-18 21:52   ` Alex Deucher
2021-01-19  8:48   ` Christian König [this message]
2021-01-19 13:45     ` Daniel Vetter
2021-01-19 21:21       ` Andrey Grodzovsky
2021-01-19 22:01         ` Daniel Vetter
2021-01-20  4:21           ` Andrey Grodzovsky
2021-01-20  8:38             ` Daniel Vetter
     [not found]               ` <1a5f7ccb-1f91-91be-1cb1-e7cb43ac2c13@amd.com>
2021-01-21 10:48                 ` Daniel Vetter
2021-01-20  5:01     ` Andrey Grodzovsky
2021-01-20 19:38       ` Andrey Grodzovsky
2021-01-21 10:42         ` Christian König
2021-01-18 21:01 ` [PATCH v4 08/14] drm/amdgpu: Fix a bunch of sdma code crash post device unplug Andrey Grodzovsky
2021-01-19  8:51   ` Christian König
2021-01-18 21:01 ` [PATCH v4 09/14] drm/amdgpu: Remap all page faults to per process dummy page Andrey Grodzovsky
2021-01-19  8:52   ` Christian König
2021-01-18 21:01 ` [PATCH v4 10/14] dmr/amdgpu: Move some sysfs attrs creation to default_attr Andrey Grodzovsky
2021-01-19  7:34   ` Greg KH
2021-01-19 16:36     ` Andrey Grodzovsky
2021-01-19 17:47       ` Greg KH
2021-01-19 19:04         ` Alex Deucher
2021-01-19 19:16           ` Andrey Grodzovsky
2021-01-19 19:41           ` Greg KH
2021-01-19  8:53   ` Christian König
2021-01-18 21:01 ` [PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal Andrey Grodzovsky
2021-01-19  8:55   ` Christian König
2021-01-19 15:35     ` Andrey Grodzovsky
2021-01-19 15:39       ` Christian König
2021-01-19 18:05       ` Daniel Vetter
2021-01-19 18:22         ` Andrey Grodzovsky
2021-01-19 18:59           ` Christian König
2021-01-19 19:16             ` Andrey Grodzovsky
2021-01-20 19:34               ` Andrey Grodzovsky
2021-01-28 17:23             ` Andrey Grodzovsky
2021-01-29 15:16               ` Christian König
2021-01-29 17:35                 ` Andrey Grodzovsky
2021-01-29 19:25                   ` Christian König
2021-02-05 16:22                     ` Andrey Grodzovsky
2021-02-05 22:10                       ` Daniel Vetter
2021-02-05 23:09                         ` Andrey Grodzovsky
2021-02-06 14:18                           ` Daniel Vetter
2021-02-07 21:28                         ` Andrey Grodzovsky
2021-02-07 21:50                           ` Daniel Vetter
2021-02-08  9:37                             ` Christian König
2021-02-08  9:48                               ` Daniel Vetter
2021-02-08 10:03                                 ` Christian König
2021-02-08 10:11                                   ` Daniel Vetter
2021-02-08 13:59                                     ` Christian König
2021-02-08 16:23                                       ` Daniel Vetter
2021-02-08 22:15                                         ` Andrey Grodzovsky
2021-02-09  7:58                                           ` Christian König
2021-02-09 14:30                                             ` Andrey Grodzovsky
2021-02-09 15:40                                               ` Christian König
2021-02-10 22:01                                                 ` Andrey Grodzovsky
2021-02-12 15:00                                                   ` Andrey Grodzovsky
2021-02-08 22:09                               ` Andrey Grodzovsky
2021-02-09  8:27                                 ` Christian König
2021-02-09  9:46                                   ` Daniel Vetter
2021-01-18 21:01 ` [PATCH v4 12/14] drm/scheduler: Job timeout handler returns status Andrey Grodzovsky
2021-01-19  7:53   ` Christian König
2021-01-19 17:47     ` Luben Tuikov
2021-01-19 18:53       ` Christian König
2021-01-18 21:01 ` [PATCH v4 13/14] drm/sched: Make timeout timer rearm conditional Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 14/14] drm/amdgpu: Prevent any job recoveries after device is unplugged Andrey Grodzovsky
2021-01-19 14:16 ` [PATCH v4 00/14] RFC Support hot device unplug in amdgpu Daniel Vetter
2021-01-19 17:31   ` Andrey Grodzovsky
2021-01-19 18:08     ` Daniel Vetter
2021-01-19 18:18       ` Andrey Grodzovsky
2021-01-20  9:05         ` Daniel Vetter
2021-01-20 14:19           ` Andrey Grodzovsky
2021-01-20 15:59             ` Daniel Vetter
2021-02-08  5:59               ` Andrey Grodzovsky
2021-02-08  7:27                 ` Daniel Vetter
2021-02-09  4:01                   ` Andrey Grodzovsky
2021-02-09  9:50                     ` Daniel Vetter
2021-02-09 15:34                       ` Andrey Grodzovsky
2021-02-18 20:03                       ` Andrey Grodzovsky
2021-02-19 10:24                         ` Daniel Vetter
2021-02-24 16:30                           ` Andrey Grodzovsky
2021-02-25 10:25                             ` Daniel Vetter
2021-02-25 16:12                               ` Andrey Grodzovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2978ebf8-858d-6e8e-5657-1d0d615d56e0@gmail.com \
    --to=ckoenig.leichtzumerken@gmail.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=andrey.grodzovsky@amd.com \
    --cc=christian.koenig@amd.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=eric@anholt.net \
    --cc=gregkh@linuxfoundation.org \
    --cc=l.stach@pengutronix.de \
    --cc=robh@kernel.org \
    --cc=yuq825@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).