From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: Andrey Grodzovsky <andrey.grodzovsky@amd.com>,
amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
daniel.vetter@ffwll.ch, robh@kernel.org, l.stach@pengutronix.de,
yuq825@gmail.com, eric@anholt.net
Cc: Alexander.Deucher@amd.com, gregkh@linuxfoundation.org
Subject: Re: [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.
Date: Tue, 19 Jan 2021 09:48:03 +0100 [thread overview]
Message-ID: <2978ebf8-858d-6e8e-5657-1d0d615d56e0@gmail.com> (raw)
In-Reply-To: <1611003683-3534-8-git-send-email-andrey.grodzovsky@amd.com>
Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> Handle all DMA IOMMU gropup related dependencies before the
> group is removed.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 ++
> 6 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 478a7d8..2953420 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -51,6 +51,7 @@
> #include <linux/dma-fence.h>
> #include <linux/pci.h>
> #include <linux/aer.h>
> +#include <linux/notifier.h>
>
> #include <drm/ttm/ttm_bo_api.h>
> #include <drm/ttm/ttm_bo_driver.h>
> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>
> bool in_pci_err_recovery;
> struct pci_saved_state *pci_state;
> +
> + struct notifier_block nb;
> + struct blocking_notifier_head notifier;
> + struct list_head device_bo_list;
> };
>
> static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 45e23e3..e99f4f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -70,6 +70,8 @@
> #include <drm/task_barrier.h>
> #include <linux/pm_runtime.h>
>
> +#include <linux/iommu.h>
> +
> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
> };
>
>
> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> + unsigned long action, void *data)
> +{
> + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> + struct amdgpu_bo *bo = NULL;
> +
> + /*
> + * Following is a set of IOMMU group dependencies taken care of before
> + * device's IOMMU group is removed
> + */
> + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> +
> + spin_lock(&ttm_bo_glob.lru_lock);
> + list_for_each_entry(bo, &adev->device_bo_list, bo) {
> + if (bo->tbo.ttm)
> + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> + }
> + spin_unlock(&ttm_bo_glob.lru_lock);
That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
You need to use a mutex here or even better make sure you can access the
device_bo_list without a lock in this moment.
Christian.
> +
> + if (adev->irq.ih.use_bus_addr)
> + amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> + if (adev->irq.ih1.use_bus_addr)
> + amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> + if (adev->irq.ih2.use_bus_addr)
> + amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> +
> + amdgpu_gart_dummy_page_fini(adev);
> + }
> +
> + return NOTIFY_OK;
> +}
> +
> +
> /**
> * amdgpu_device_init - initialize the driver
> *
> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>
> INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>
> + INIT_LIST_HEAD(&adev->device_bo_list);
> +
> adev->gfx.gfx_off_req_count = 1;
> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>
> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> if (amdgpu_device_cache_pci_state(adev->pdev))
> pci_restore_state(pdev);
>
> + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> + adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> +
> + if (adev->dev->iommu_group) {
> + r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> + if (r)
> + goto failed;
> + }
> +
> return 0;
>
> failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 0db9330..486ad6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
> *
> * Frees the dummy page used by the driver (all asics).
> */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> {
> if (!adev->dummy_page_addr)
> return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index afa2e28..5678d9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
> void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
> int amdgpu_gart_init(struct amdgpu_device *adev);
> void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
> int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> int pages);
> int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6cc9919..4a1de69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
> }
> amdgpu_bo_unref(&bo->parent);
>
> + spin_lock(&ttm_bo_glob.lru_lock);
> + list_del(&bo->bo);
> + spin_unlock(&ttm_bo_glob.lru_lock);
> +
> kfree(bo->metadata);
> kfree(bo);
> }
> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
> if (bp->type == ttm_bo_type_device)
> bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>
> + INIT_LIST_HEAD(&bo->bo);
> +
> + spin_lock(&ttm_bo_glob.lru_lock);
> + list_add_tail(&bo->bo, &adev->device_bo_list);
> + spin_unlock(&ttm_bo_glob.lru_lock);
> +
> return 0;
>
> fail_unreserve:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 9ac3756..5ae8555 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -110,6 +110,8 @@ struct amdgpu_bo {
> struct list_head shadow_list;
>
> struct kgd_mem *kfd_bo;
> +
> + struct list_head bo;
> };
>
> static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
next prev parent reply other threads:[~2021-01-19 8:48 UTC|newest]
Thread overview: 98+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-18 21:01 [PATCH v4 00/14] RFC Support hot device unplug in amdgpu Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 01/14] drm/ttm: Remap all page faults to per process dummy page Andrey Grodzovsky
2021-01-18 21:48 ` Alex Deucher
2021-01-19 8:41 ` Christian König
2021-01-19 13:56 ` Daniel Vetter
2021-01-25 15:28 ` Andrey Grodzovsky
2021-01-27 14:29 ` Andrey Grodzovsky
2021-02-02 14:21 ` Daniel Vetter
2021-01-18 21:01 ` [PATCH v4 02/14] drm: Unamp the entire device address space on device unplug Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 03/14] drm/ttm: Expose ttm_tt_unpopulate for driver use Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 04/14] drm/sched: Cancel and flush all oustatdning jobs before finish Andrey Grodzovsky
2021-01-18 21:49 ` Alex Deucher
2021-01-19 8:42 ` Christian König
2021-01-19 9:50 ` Christian König
2021-01-18 21:01 ` [PATCH v4 05/14] drm/amdgpu: Split amdgpu_device_fini into early and late Andrey Grodzovsky
2021-01-19 8:45 ` Christian König
2021-01-18 21:01 ` [PATCH v4 06/14] drm/amdgpu: Add early fini callback Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device Andrey Grodzovsky
2021-01-18 21:52 ` Alex Deucher
2021-01-19 8:48 ` Christian König [this message]
2021-01-19 13:45 ` Daniel Vetter
2021-01-19 21:21 ` Andrey Grodzovsky
2021-01-19 22:01 ` Daniel Vetter
2021-01-20 4:21 ` Andrey Grodzovsky
2021-01-20 8:38 ` Daniel Vetter
[not found] ` <1a5f7ccb-1f91-91be-1cb1-e7cb43ac2c13@amd.com>
2021-01-21 10:48 ` Daniel Vetter
2021-01-20 5:01 ` Andrey Grodzovsky
2021-01-20 19:38 ` Andrey Grodzovsky
2021-01-21 10:42 ` Christian König
2021-01-18 21:01 ` [PATCH v4 08/14] drm/amdgpu: Fix a bunch of sdma code crash post device unplug Andrey Grodzovsky
2021-01-19 8:51 ` Christian König
2021-01-18 21:01 ` [PATCH v4 09/14] drm/amdgpu: Remap all page faults to per process dummy page Andrey Grodzovsky
2021-01-19 8:52 ` Christian König
2021-01-18 21:01 ` [PATCH v4 10/14] dmr/amdgpu: Move some sysfs attrs creation to default_attr Andrey Grodzovsky
2021-01-19 7:34 ` Greg KH
2021-01-19 16:36 ` Andrey Grodzovsky
2021-01-19 17:47 ` Greg KH
2021-01-19 19:04 ` Alex Deucher
2021-01-19 19:16 ` Andrey Grodzovsky
2021-01-19 19:41 ` Greg KH
2021-01-19 8:53 ` Christian König
2021-01-18 21:01 ` [PATCH v4 11/14] drm/amdgpu: Guard against write accesses after device removal Andrey Grodzovsky
2021-01-19 8:55 ` Christian König
2021-01-19 15:35 ` Andrey Grodzovsky
2021-01-19 15:39 ` Christian König
2021-01-19 18:05 ` Daniel Vetter
2021-01-19 18:22 ` Andrey Grodzovsky
2021-01-19 18:59 ` Christian König
2021-01-19 19:16 ` Andrey Grodzovsky
2021-01-20 19:34 ` Andrey Grodzovsky
2021-01-28 17:23 ` Andrey Grodzovsky
2021-01-29 15:16 ` Christian König
2021-01-29 17:35 ` Andrey Grodzovsky
2021-01-29 19:25 ` Christian König
2021-02-05 16:22 ` Andrey Grodzovsky
2021-02-05 22:10 ` Daniel Vetter
2021-02-05 23:09 ` Andrey Grodzovsky
2021-02-06 14:18 ` Daniel Vetter
2021-02-07 21:28 ` Andrey Grodzovsky
2021-02-07 21:50 ` Daniel Vetter
2021-02-08 9:37 ` Christian König
2021-02-08 9:48 ` Daniel Vetter
2021-02-08 10:03 ` Christian König
2021-02-08 10:11 ` Daniel Vetter
2021-02-08 13:59 ` Christian König
2021-02-08 16:23 ` Daniel Vetter
2021-02-08 22:15 ` Andrey Grodzovsky
2021-02-09 7:58 ` Christian König
2021-02-09 14:30 ` Andrey Grodzovsky
2021-02-09 15:40 ` Christian König
2021-02-10 22:01 ` Andrey Grodzovsky
2021-02-12 15:00 ` Andrey Grodzovsky
2021-02-08 22:09 ` Andrey Grodzovsky
2021-02-09 8:27 ` Christian König
2021-02-09 9:46 ` Daniel Vetter
2021-01-18 21:01 ` [PATCH v4 12/14] drm/scheduler: Job timeout handler returns status Andrey Grodzovsky
2021-01-19 7:53 ` Christian König
2021-01-19 17:47 ` Luben Tuikov
2021-01-19 18:53 ` Christian König
2021-01-18 21:01 ` [PATCH v4 13/14] drm/sched: Make timeout timer rearm conditional Andrey Grodzovsky
2021-01-18 21:01 ` [PATCH v4 14/14] drm/amdgpu: Prevent any job recoveries after device is unplugged Andrey Grodzovsky
2021-01-19 14:16 ` [PATCH v4 00/14] RFC Support hot device unplug in amdgpu Daniel Vetter
2021-01-19 17:31 ` Andrey Grodzovsky
2021-01-19 18:08 ` Daniel Vetter
2021-01-19 18:18 ` Andrey Grodzovsky
2021-01-20 9:05 ` Daniel Vetter
2021-01-20 14:19 ` Andrey Grodzovsky
2021-01-20 15:59 ` Daniel Vetter
2021-02-08 5:59 ` Andrey Grodzovsky
2021-02-08 7:27 ` Daniel Vetter
2021-02-09 4:01 ` Andrey Grodzovsky
2021-02-09 9:50 ` Daniel Vetter
2021-02-09 15:34 ` Andrey Grodzovsky
2021-02-18 20:03 ` Andrey Grodzovsky
2021-02-19 10:24 ` Daniel Vetter
2021-02-24 16:30 ` Andrey Grodzovsky
2021-02-25 10:25 ` Daniel Vetter
2021-02-25 16:12 ` Andrey Grodzovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2978ebf8-858d-6e8e-5657-1d0d615d56e0@gmail.com \
--to=ckoenig.leichtzumerken@gmail.com \
--cc=Alexander.Deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=andrey.grodzovsky@amd.com \
--cc=christian.koenig@amd.com \
--cc=daniel.vetter@ffwll.ch \
--cc=dri-devel@lists.freedesktop.org \
--cc=eric@anholt.net \
--cc=gregkh@linuxfoundation.org \
--cc=l.stach@pengutronix.de \
--cc=robh@kernel.org \
--cc=yuq825@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).