All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: Fix incorrect use of process->mm
@ 2018-10-02 22:41 Felix Kuehling
       [not found] ` <1538520072-1085-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2018-10-02 22:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w, Felix Kuehling

This mm_struct pointer should never be dereferenced. If running in
a user thread, just use current->mm. If running in a kernel worker
use get_task_mm to get a safe reference to the mm_struct.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 37 +++++++++++++++++-----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d6af31c..3bc0651d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
 					struct qcm_process_device *qpd)
 {
-	int retval;
 	struct mqd_manager *mqd_mgr;
+	int retval;
 
 	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
 	if (!mqd_mgr)
@@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	if (!q->properties.is_active)
 		return 0;
 
-	retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
-			&q->properties, q->process->mm);
+	if (WARN(q->process->mm != current->mm,
+		 "should only run in user thread"))
+		retval = -EFAULT;
+	else
+		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
+					   &q->properties, current->mm);
 	if (retval)
 		goto out_uninit_mqd;
 
@@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		retval = map_queues_cpsch(dqm);
 	else if (q->properties.is_active &&
 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
-		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
-				       &q->properties, q->process->mm);
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+		if (WARN(q->process->mm != current->mm,
+			 "should only run in user thread"))
+			retval = -EFAULT;
+		else
+			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
+						   q->pipe, q->queue,
+						   &q->properties, current->mm);
+	}
 
 out_unlock:
 	dqm_unlock(dqm);
@@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 					  struct qcm_process_device *qpd)
 {
+	struct mm_struct *mm = NULL;
 	struct queue *q;
 	struct mqd_manager *mqd_mgr;
 	struct kfd_process_device *pdd;
@@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		kfd_flush_tlb(pdd);
 	}
 
+	/* Take a safe reference to the mm_struct, which may otherwise
+	 * disappear even while the kfd_process is still referenced.
+	 */
+	mm = get_task_mm(pdd->process->lead_thread);
+	if (!mm) {
+		retval = -EFAULT;
+		goto out;
+	}
+
 	/* activate all active queues on the qpd */
 	list_for_each_entry(q, &qpd->queues_list, list) {
 		if (!q->properties.is_evicted)
@@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		q->properties.is_evicted = false;
 		q->properties.is_active = true;
 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
-				       q->queue, &q->properties,
-				       q->process->mm);
+				       q->queue, &q->properties, mm);
 		if (retval)
 			goto out;
 		dqm->queue_count++;
 	}
 	qpd->evicted = 0;
 out:
+	if (mm)
+		mmput(mm);
 	dqm_unlock(dqm);
 	return retval;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: Fix incorrect use of process->mm
       [not found] ` <1538520072-1085-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-03 12:17   ` Kuehling, Felix
       [not found]     ` <DM5PR12MB170715EA570B77FDD0A0D5DB92E90-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2018-10-03 19:03   ` Oded Gabbay
  1 sibling, 1 reply; 4+ messages in thread
From: Kuehling, Felix @ 2018-10-03 12:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Deucher, Alexander
  Cc: oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w

Hi Alex,

If it's not too late, I'd like to get this into 4.19. Sorry I missed this fix earlier.

Regards,
  Felix

________________________________________
From: Kuehling, Felix
Sent: Tuesday, October 2, 2018 6:41:12 PM
To: amd-gfx@lists.freedesktop.org
Cc: oded.gabbay@gmail.com; Kuehling, Felix
Subject: [PATCH] drm/amdkfd: Fix incorrect use of process->mm

This mm_struct pointer should never be dereferenced. If running in
a user thread, just use current->mm. If running in a kernel worker
use get_task_mm to get a safe reference to the mm_struct.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 37 +++++++++++++++++-----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d6af31c..3bc0651d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
                                        struct queue *q,
                                        struct qcm_process_device *qpd)
 {
-       int retval;
        struct mqd_manager *mqd_mgr;
+       int retval;

        mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
        if (!mqd_mgr)
@@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
        if (!q->properties.is_active)
                return 0;

-       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
-                       &q->properties, q->process->mm);
+       if (WARN(q->process->mm != current->mm,
+                "should only run in user thread"))
+               retval = -EFAULT;
+       else
+               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
+                                          &q->properties, current->mm);
        if (retval)
                goto out_uninit_mqd;

@@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
                retval = map_queues_cpsch(dqm);
        else if (q->properties.is_active &&
                 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-                 q->properties.type == KFD_QUEUE_TYPE_SDMA))
-               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
-                                      &q->properties, q->process->mm);
+                 q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+               if (WARN(q->process->mm != current->mm,
+                        "should only run in user thread"))
+                       retval = -EFAULT;
+               else
+                       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
+                                                  q->pipe, q->queue,
+                                                  &q->properties, current->mm);
+       }

 out_unlock:
        dqm_unlock(dqm);
@@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
                                          struct qcm_process_device *qpd)
 {
+       struct mm_struct *mm = NULL;
        struct queue *q;
        struct mqd_manager *mqd_mgr;
        struct kfd_process_device *pdd;
@@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
                kfd_flush_tlb(pdd);
        }

+       /* Take a safe reference to the mm_struct, which may otherwise
+        * disappear even while the kfd_process is still referenced.
+        */
+       mm = get_task_mm(pdd->process->lead_thread);
+       if (!mm) {
+               retval = -EFAULT;
+               goto out;
+       }
+
        /* activate all active queues on the qpd */
        list_for_each_entry(q, &qpd->queues_list, list) {
                if (!q->properties.is_evicted)
@@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
                q->properties.is_evicted = false;
                q->properties.is_active = true;
                retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
-                                      q->queue, &q->properties,
-                                      q->process->mm);
+                                      q->queue, &q->properties, mm);
                if (retval)
                        goto out;
                dqm->queue_count++;
        }
        qpd->evicted = 0;
 out:
+       if (mm)
+               mmput(mm);
        dqm_unlock(dqm);
        return retval;
 }
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: Fix incorrect use of process->mm
       [not found]     ` <DM5PR12MB170715EA570B77FDD0A0D5DB92E90-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-10-03 14:18       ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2018-10-03 14:18 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Deucher, Alexander
  Cc: oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w

Feel free to add an Acked-by: Christian König <christian.koenig@amd.com> 
as well if that helps.

Christian.

Am 03.10.2018 um 14:17 schrieb Kuehling, Felix:
> Hi Alex,
>
> If it's not too late, I'd like to get this into 4.19. Sorry I missed this fix earlier.
>
> Regards,
>    Felix
>
> ________________________________________
> From: Kuehling, Felix
> Sent: Tuesday, October 2, 2018 6:41:12 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: oded.gabbay@gmail.com; Kuehling, Felix
> Subject: [PATCH] drm/amdkfd: Fix incorrect use of process->mm
>
> This mm_struct pointer should never be dereferenced. If running in
> a user thread, just use current->mm. If running in a kernel worker
> use get_task_mm to get a safe reference to the mm_struct.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 37 +++++++++++++++++-----
>   1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d6af31c..3bc0651d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>                                          struct queue *q,
>                                          struct qcm_process_device *qpd)
>   {
> -       int retval;
>          struct mqd_manager *mqd_mgr;
> +       int retval;
>
>          mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
>          if (!mqd_mgr)
> @@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>          if (!q->properties.is_active)
>                  return 0;
>
> -       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> -                       &q->properties, q->process->mm);
> +       if (WARN(q->process->mm != current->mm,
> +                "should only run in user thread"))
> +               retval = -EFAULT;
> +       else
> +               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> +                                          &q->properties, current->mm);
>          if (retval)
>                  goto out_uninit_mqd;
>
> @@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>                  retval = map_queues_cpsch(dqm);
>          else if (q->properties.is_active &&
>                   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> -                 q->properties.type == KFD_QUEUE_TYPE_SDMA))
> -               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> -                                      &q->properties, q->process->mm);
> +                 q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
> +               if (WARN(q->process->mm != current->mm,
> +                        "should only run in user thread"))
> +                       retval = -EFAULT;
> +               else
> +                       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
> +                                                  q->pipe, q->queue,
> +                                                  &q->properties, current->mm);
> +       }
>
>   out_unlock:
>          dqm_unlock(dqm);
> @@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
>   static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                                            struct qcm_process_device *qpd)
>   {
> +       struct mm_struct *mm = NULL;
>          struct queue *q;
>          struct mqd_manager *mqd_mgr;
>          struct kfd_process_device *pdd;
> @@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                  kfd_flush_tlb(pdd);
>          }
>
> +       /* Take a safe reference to the mm_struct, which may otherwise
> +        * disappear even while the kfd_process is still referenced.
> +        */
> +       mm = get_task_mm(pdd->process->lead_thread);
> +       if (!mm) {
> +               retval = -EFAULT;
> +               goto out;
> +       }
> +
>          /* activate all active queues on the qpd */
>          list_for_each_entry(q, &qpd->queues_list, list) {
>                  if (!q->properties.is_evicted)
> @@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                  q->properties.is_evicted = false;
>                  q->properties.is_active = true;
>                  retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
> -                                      q->queue, &q->properties,
> -                                      q->process->mm);
> +                                      q->queue, &q->properties, mm);
>                  if (retval)
>                          goto out;
>                  dqm->queue_count++;
>          }
>          qpd->evicted = 0;
>   out:
> +       if (mm)
> +               mmput(mm);
>          dqm_unlock(dqm);
>          return retval;
>   }
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: Fix incorrect use of process->mm
       [not found] ` <1538520072-1085-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2018-10-03 12:17   ` Kuehling, Felix
@ 2018-10-03 19:03   ` Oded Gabbay
  1 sibling, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2018-10-03 19:03 UTC (permalink / raw)
  To: Kuehling, Felix; +Cc: amd-gfx list

On Wed, Oct 3, 2018 at 1:41 AM Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>
> This mm_struct pointer should never be dereferenced. If running in
> a user thread, just use current->mm. If running in a kernel worker
> use get_task_mm to get a safe reference to the mm_struct.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 37 +++++++++++++++++-----
>  1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d6af31c..3bc0651d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>                                         struct queue *q,
>                                         struct qcm_process_device *qpd)
>  {
> -       int retval;
>         struct mqd_manager *mqd_mgr;
> +       int retval;
>
>         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
>         if (!mqd_mgr)
> @@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>         if (!q->properties.is_active)
>                 return 0;
>
> -       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> -                       &q->properties, q->process->mm);
> +       if (WARN(q->process->mm != current->mm,
> +                "should only run in user thread"))
> +               retval = -EFAULT;
> +       else
> +               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> +                                          &q->properties, current->mm);
>         if (retval)
>                 goto out_uninit_mqd;
>
> @@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>                 retval = map_queues_cpsch(dqm);
>         else if (q->properties.is_active &&
>                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> -                 q->properties.type == KFD_QUEUE_TYPE_SDMA))
> -               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> -                                      &q->properties, q->process->mm);
> +                 q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
> +               if (WARN(q->process->mm != current->mm,
> +                        "should only run in user thread"))
> +                       retval = -EFAULT;
> +               else
> +                       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
> +                                                  q->pipe, q->queue,
> +                                                  &q->properties, current->mm);
> +       }
>
>  out_unlock:
>         dqm_unlock(dqm);
> @@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
>  static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                                           struct qcm_process_device *qpd)
>  {
> +       struct mm_struct *mm = NULL;
>         struct queue *q;
>         struct mqd_manager *mqd_mgr;
>         struct kfd_process_device *pdd;
> @@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                 kfd_flush_tlb(pdd);
>         }
>
> +       /* Take a safe reference to the mm_struct, which may otherwise
> +        * disappear even while the kfd_process is still referenced.
> +        */
> +       mm = get_task_mm(pdd->process->lead_thread);
> +       if (!mm) {
> +               retval = -EFAULT;
> +               goto out;
> +       }
> +
>         /* activate all active queues on the qpd */
>         list_for_each_entry(q, &qpd->queues_list, list) {
>                 if (!q->properties.is_evicted)
> @@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                 q->properties.is_evicted = false;
>                 q->properties.is_active = true;
>                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
> -                                      q->queue, &q->properties,
> -                                      q->process->mm);
> +                                      q->queue, &q->properties, mm);
>                 if (retval)
>                         goto out;
>                 dqm->queue_count++;
>         }
>         qpd->evicted = 0;
>  out:
> +       if (mm)
> +               mmput(mm);
>         dqm_unlock(dqm);
>         return retval;
>  }
> --
> 2.7.4
>

This patch is:
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-10-03 19:03 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-02 22:41 [PATCH] drm/amdkfd: Fix incorrect use of process->mm Felix Kuehling
     [not found] ` <1538520072-1085-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2018-10-03 12:17   ` Kuehling, Felix
     [not found]     ` <DM5PR12MB170715EA570B77FDD0A0D5DB92E90-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-10-03 14:18       ` Christian König
2018-10-03 19:03   ` Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.