All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm: assure aux_dev is nonzero before using it
@ 2019-05-23 11:09 ` tcamuso
  0 siblings, 0 replies; 29+ messages in thread
From: tcamuso @ 2019-05-23 11:09 UTC (permalink / raw)
  To: dri-devel, linux-kernel; +Cc: airlied, daniel, tcamuso, dkwon

From Daniel Kwon <dkwon@redhat.com>

The system was crashed due to invalid memory access while trying to access
auxiliary device.

crash> bt
PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
 #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
 #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
 #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
 #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
 #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
 #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
 #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
 #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
 #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
 #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
    [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
    RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
    RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
    RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
    R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
    R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
    RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
    RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
    RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
    R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
    R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
    ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b

----------------------------------------------------------------------------

It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
check on this, but had failed to do it.

----------------------------------------------------------------------------
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
     114 	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>:	mov    0x8a41(%rip),%rax        # 0xffffffffc0a613e0 <aux_idr>
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
     116 	if (hint && (id & ~IDR_MASK) == hint->prefix)
     117 		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>:	test   %rax,%rax
0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>:	je     0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>:	mov    %ebx,%edx
0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>:	xor    %dl,%dl
0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>:	cmp    (%rax),%edx
0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>:	je     0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
     119 	return idr_find_slowpath(idr, id);
0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>:	mov    %ebx,%esi
0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>:	mov    $0xffffffffc0a613e0,%rdi
0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>:	callq  0xffffffffb09771b0 <idr_find_slowpath>
0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>:	mov    %rax,%rbx
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
      25 	return ACCESS_ONCE((v)->counter);
0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>:	mov    0x18(%rbx),%edx

crash> struct file.f_path 0xffff89d18fadfa00
  f_path = {
    mnt = 0xffff89f23feaa620,
    dentry = 0xffff89f1be7a1cc0
  }
crash> files -d 0xffff89f1be7a1cc0
     DENTRY           INODE           SUPERBLK     TYPE PATH
ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR  /dev/ipmi0

crash> struct inode.i_rdev ffff89f1b32a2830
  i_rdev = 0xf200000
crash> eval (0xfffff & 0xf200000)
hexadecimal: 0
    decimal: 0
      octal: 0
     binary: 0000000000000000000000000000000000000000000000000000000000000000
----------------------------------------------------------------------------

As the index value was 0 and aux_idr had value 0 for all, it can have value
NULL from idr_find() function, but the below function doesn't check and just
tries to use it.

----------------------------------------------------------------------------
crash> aux_idr
aux_idr = $8 = {
  hint = 0x0,
  top = 0x0,
  id_free = 0x0,
  layers = 0x0,
  id_free_cnt = 0x0,
  cur = 0x0,
  lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0x0
          }
        }
      }
    }
  }
}

crash> edis -f drm_dp_aux_dev_get_by_minor
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57

      56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
      57 {
      58 	struct drm_dp_aux_dev *aux_dev = NULL;
      59
      60 	mutex_lock(&aux_idr_mutex);
      61 	aux_dev = idr_find(&aux_idr, index);
      62 	if (!kref_get_unless_zero(&aux_dev->refcount))
      63 		aux_dev = NULL;
      64 	mutex_unlock(&aux_idr_mutex);
      65
      66 	return aux_dev;
      67 }
----------------------------------------------------------------------------

To avoid this kinds of situation, we should make a safeguard for the returned
value. Changing the line 62 with the below would do.

      62 	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
                    ^^^^^^^^^^
From Tony Camuso <tcamuso@redhat.com>
I built a patched kernel for several architectures.
Booted the kernel, and ran the following for 100 iterations.
   rmmod ipmi kmods to remove /dev/ipmi0.
   Invoked ipmitool
   insmod ipmi kmods
Did not see any crashes or call traces.

Suggested-by: Daniel Kwon <dkwon@redhat.com>
Signed-off-by: Tony Camuso <tcamuso@redhat.com>
---
 drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 0e4f25d63fd2d..0b11210c882ee 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
 
 	mutex_lock(&aux_idr_mutex);
 	aux_dev = idr_find(&aux_idr, index);
-	if (!kref_get_unless_zero(&aux_dev->refcount))
+	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
 		aux_dev = NULL;
 	mutex_unlock(&aux_idr_mutex);
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH] drm: assure aux_dev is nonzero before using it
@ 2019-05-23 11:09 ` tcamuso
  0 siblings, 0 replies; 29+ messages in thread
From: tcamuso @ 2019-05-23 11:09 UTC (permalink / raw)
  To: dri-devel, linux-kernel; +Cc: airlied, daniel, tcamuso, dkwon

>From Daniel Kwon <dkwon@redhat.com>

The system was crashed due to invalid memory access while trying to access
auxiliary device.

crash> bt
PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
 #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
 #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
 #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
 #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
 #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
 #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
 #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
 #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
 #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
 #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
    [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
    RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
    RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
    RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
    R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
    R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
    RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
    RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
    RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
    R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
    R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
    ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b

----------------------------------------------------------------------------

It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
check on this, but had failed to do it.

----------------------------------------------------------------------------
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
     114 	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>:	mov    0x8a41(%rip),%rax        # 0xffffffffc0a613e0 <aux_idr>
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
     116 	if (hint && (id & ~IDR_MASK) == hint->prefix)
     117 		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>:	test   %rax,%rax
0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>:	je     0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>:	mov    %ebx,%edx
0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>:	xor    %dl,%dl
0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>:	cmp    (%rax),%edx
0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>:	je     0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
     119 	return idr_find_slowpath(idr, id);
0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>:	mov    %ebx,%esi
0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>:	mov    $0xffffffffc0a613e0,%rdi
0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>:	callq  0xffffffffb09771b0 <idr_find_slowpath>
0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>:	mov    %rax,%rbx
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
      25 	return ACCESS_ONCE((v)->counter);
0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>:	mov    0x18(%rbx),%edx

crash> struct file.f_path 0xffff89d18fadfa00
  f_path = {
    mnt = 0xffff89f23feaa620,
    dentry = 0xffff89f1be7a1cc0
  }
crash> files -d 0xffff89f1be7a1cc0
     DENTRY           INODE           SUPERBLK     TYPE PATH
ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR  /dev/ipmi0

crash> struct inode.i_rdev ffff89f1b32a2830
  i_rdev = 0xf200000
crash> eval (0xfffff & 0xf200000)
hexadecimal: 0
    decimal: 0
      octal: 0
     binary: 0000000000000000000000000000000000000000000000000000000000000000
----------------------------------------------------------------------------

As the index value was 0 and aux_idr had value 0 for all, it can have value
NULL from idr_find() function, but the below function doesn't check and just
tries to use it.

----------------------------------------------------------------------------
crash> aux_idr
aux_idr = $8 = {
  hint = 0x0,
  top = 0x0,
  id_free = 0x0,
  layers = 0x0,
  id_free_cnt = 0x0,
  cur = 0x0,
  lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0x0
          }
        }
      }
    }
  }
}

crash> edis -f drm_dp_aux_dev_get_by_minor
/usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57

      56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
      57 {
      58 	struct drm_dp_aux_dev *aux_dev = NULL;
      59
      60 	mutex_lock(&aux_idr_mutex);
      61 	aux_dev = idr_find(&aux_idr, index);
      62 	if (!kref_get_unless_zero(&aux_dev->refcount))
      63 		aux_dev = NULL;
      64 	mutex_unlock(&aux_idr_mutex);
      65
      66 	return aux_dev;
      67 }
----------------------------------------------------------------------------

To avoid this kinds of situation, we should make a safeguard for the returned
value. Changing the line 62 with the below would do.

      62 	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
                    ^^^^^^^^^^
>From Tony Camuso <tcamuso@redhat.com>
I built a patched kernel for several architectures.
Booted the kernel, and ran the following for 100 iterations.
   rmmod ipmi kmods to remove /dev/ipmi0.
   Invoked ipmitool
   insmod ipmi kmods
Did not see any crashes or call traces.

Suggested-by: Daniel Kwon <dkwon@redhat.com>
Signed-off-by: Tony Camuso <tcamuso@redhat.com>
---
 drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 0e4f25d63fd2d..0b11210c882ee 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
 
 	mutex_lock(&aux_idr_mutex);
 	aux_dev = idr_find(&aux_idr, index);
-	if (!kref_get_unless_zero(&aux_dev->refcount))
+	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
 		aux_dev = NULL;
 	mutex_unlock(&aux_idr_mutex);
 
-- 
2.20.1

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-05-23 11:09 ` tcamuso
@ 2019-05-24  8:36   ` Jani Nikula
  -1 siblings, 0 replies; 29+ messages in thread
From: Jani Nikula @ 2019-05-24  8:36 UTC (permalink / raw)
  To: tcamuso, dri-devel, linux-kernel; +Cc: airlied, dkwon, tcamuso

On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> From Daniel Kwon <dkwon@redhat.com>
>
> The system was crashed due to invalid memory access while trying to access
> auxiliary device.
>
> crash> bt
> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>  #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>  #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>  #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>  #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>  #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>  #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>  #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>  #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>  #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>  #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>     [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>     RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>     RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>     RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>     RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>     R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>     R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>     RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>     RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>     RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>     RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>     R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>     R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>     ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>
> ----------------------------------------------------------------------------
>
> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> check on this, but had failed to do it.

I think the better question is, *why* does the idr_find() return NULL? I
don't think it should, under any circumstances. I fear adding the check
here papers over some other problem, taking us further away from the
root cause.

Also, can you reproduce this on a recent upstream kernel? The aux device
nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
is pretty much irrelevant for upstream.


BR,
Jani.




>
> ----------------------------------------------------------------------------
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
>      114 	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>:	mov    0x8a41(%rip),%rax        # 0xffffffffc0a613e0 <aux_idr>
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
>      116 	if (hint && (id & ~IDR_MASK) == hint->prefix)
>      117 		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>:	test   %rax,%rax
> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>:	je     0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>:	mov    %ebx,%edx
> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>:	xor    %dl,%dl
> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>:	cmp    (%rax),%edx
> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>:	je     0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
>      119 	return idr_find_slowpath(idr, id);
> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>:	mov    %ebx,%esi
> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>:	mov    $0xffffffffc0a613e0,%rdi
> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>:	callq  0xffffffffb09771b0 <idr_find_slowpath>
> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>:	mov    %rax,%rbx
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
>       25 	return ACCESS_ONCE((v)->counter);
> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>:	mov    0x18(%rbx),%edx
>
> crash> struct file.f_path 0xffff89d18fadfa00
>   f_path = {
>     mnt = 0xffff89f23feaa620,
>     dentry = 0xffff89f1be7a1cc0
>   }
> crash> files -d 0xffff89f1be7a1cc0
>      DENTRY           INODE           SUPERBLK     TYPE PATH
> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR  /dev/ipmi0
>
> crash> struct inode.i_rdev ffff89f1b32a2830
>   i_rdev = 0xf200000
> crash> eval (0xfffff & 0xf200000)
> hexadecimal: 0
>     decimal: 0
>       octal: 0
>      binary: 0000000000000000000000000000000000000000000000000000000000000000
> ----------------------------------------------------------------------------
>
> As the index value was 0 and aux_idr had value 0 for all, it can have value
> NULL from idr_find() function, but the below function doesn't check and just
> tries to use it.
>
> ----------------------------------------------------------------------------
> crash> aux_idr
> aux_idr = $8 = {
>   hint = 0x0,
>   top = 0x0,
>   id_free = 0x0,
>   layers = 0x0,
>   id_free_cnt = 0x0,
>   cur = 0x0,
>   lock = {
>     {
>       rlock = {
>         raw_lock = {
>           val = {
>             counter = 0x0
>           }
>         }
>       }
>     }
>   }
> }
>
> crash> edis -f drm_dp_aux_dev_get_by_minor
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
>
>       56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>       57 {
>       58 	struct drm_dp_aux_dev *aux_dev = NULL;
>       59
>       60 	mutex_lock(&aux_idr_mutex);
>       61 	aux_dev = idr_find(&aux_idr, index);
>       62 	if (!kref_get_unless_zero(&aux_dev->refcount))
>       63 		aux_dev = NULL;
>       64 	mutex_unlock(&aux_idr_mutex);
>       65
>       66 	return aux_dev;
>       67 }
> ----------------------------------------------------------------------------
>
> To avoid this kinds of situation, we should make a safeguard for the returned
> value. Changing the line 62 with the below would do.
>
>       62 	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>                     ^^^^^^^^^^
> From Tony Camuso <tcamuso@redhat.com>
> I built a patched kernel for several architectures.
> Booted the kernel, and ran the following for 100 iterations.
>    rmmod ipmi kmods to remove /dev/ipmi0.
>    Invoked ipmitool
>    insmod ipmi kmods
> Did not see any crashes or call traces.
>
> Suggested-by: Daniel Kwon <dkwon@redhat.com>
> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
> ---
>  drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
> index 0e4f25d63fd2d..0b11210c882ee 100644
> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>  
>  	mutex_lock(&aux_idr_mutex);
>  	aux_dev = idr_find(&aux_idr, index);
> -	if (!kref_get_unless_zero(&aux_dev->refcount))
> +	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>  		aux_dev = NULL;
>  	mutex_unlock(&aux_idr_mutex);

-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2019-05-24  8:36   ` Jani Nikula
  0 siblings, 0 replies; 29+ messages in thread
From: Jani Nikula @ 2019-05-24  8:36 UTC (permalink / raw)
  To: dri-devel, linux-kernel; +Cc: airlied, dkwon, tcamuso

On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> From Daniel Kwon <dkwon@redhat.com>
>
> The system was crashed due to invalid memory access while trying to access
> auxiliary device.
>
> crash> bt
> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>  #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>  #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>  #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>  #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>  #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>  #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>  #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>  #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>  #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>  #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>     [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>     RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>     RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>     RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>     RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>     R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>     R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>     RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>     RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>     RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>     RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>     R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>     R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>     ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>
> ----------------------------------------------------------------------------
>
> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> check on this, but had failed to do it.

I think the better question is, *why* does the idr_find() return NULL? I
don't think it should, under any circumstances. I fear adding the check
here papers over some other problem, taking us further away from the
root cause.

Also, can you reproduce this on a recent upstream kernel? The aux device
nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
is pretty much irrelevant for upstream.


BR,
Jani.




>
> ----------------------------------------------------------------------------
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
>      114 	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>:	mov    0x8a41(%rip),%rax        # 0xffffffffc0a613e0 <aux_idr>
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
>      116 	if (hint && (id & ~IDR_MASK) == hint->prefix)
>      117 		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>:	test   %rax,%rax
> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>:	je     0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>:	mov    %ebx,%edx
> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>:	xor    %dl,%dl
> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>:	cmp    (%rax),%edx
> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>:	je     0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
>      119 	return idr_find_slowpath(idr, id);
> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>:	mov    %ebx,%esi
> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>:	mov    $0xffffffffc0a613e0,%rdi
> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>:	callq  0xffffffffb09771b0 <idr_find_slowpath>
> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>:	mov    %rax,%rbx
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
>       25 	return ACCESS_ONCE((v)->counter);
> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>:	mov    0x18(%rbx),%edx
>
> crash> struct file.f_path 0xffff89d18fadfa00
>   f_path = {
>     mnt = 0xffff89f23feaa620,
>     dentry = 0xffff89f1be7a1cc0
>   }
> crash> files -d 0xffff89f1be7a1cc0
>      DENTRY           INODE           SUPERBLK     TYPE PATH
> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR  /dev/ipmi0
>
> crash> struct inode.i_rdev ffff89f1b32a2830
>   i_rdev = 0xf200000
> crash> eval (0xfffff & 0xf200000)
> hexadecimal: 0
>     decimal: 0
>       octal: 0
>      binary: 0000000000000000000000000000000000000000000000000000000000000000
> ----------------------------------------------------------------------------
>
> As the index value was 0 and aux_idr had value 0 for all, it can have value
> NULL from idr_find() function, but the below function doesn't check and just
> tries to use it.
>
> ----------------------------------------------------------------------------
> crash> aux_idr
> aux_idr = $8 = {
>   hint = 0x0,
>   top = 0x0,
>   id_free = 0x0,
>   layers = 0x0,
>   id_free_cnt = 0x0,
>   cur = 0x0,
>   lock = {
>     {
>       rlock = {
>         raw_lock = {
>           val = {
>             counter = 0x0
>           }
>         }
>       }
>     }
>   }
> }
>
> crash> edis -f drm_dp_aux_dev_get_by_minor
> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
>
>       56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>       57 {
>       58 	struct drm_dp_aux_dev *aux_dev = NULL;
>       59
>       60 	mutex_lock(&aux_idr_mutex);
>       61 	aux_dev = idr_find(&aux_idr, index);
>       62 	if (!kref_get_unless_zero(&aux_dev->refcount))
>       63 		aux_dev = NULL;
>       64 	mutex_unlock(&aux_idr_mutex);
>       65
>       66 	return aux_dev;
>       67 }
> ----------------------------------------------------------------------------
>
> To avoid this kinds of situation, we should make a safeguard for the returned
> value. Changing the line 62 with the below would do.
>
>       62 	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>                     ^^^^^^^^^^
> From Tony Camuso <tcamuso@redhat.com>
> I built a patched kernel for several architectures.
> Booted the kernel, and ran the following for 100 iterations.
>    rmmod ipmi kmods to remove /dev/ipmi0.
>    Invoked ipmitool
>    insmod ipmi kmods
> Did not see any crashes or call traces.
>
> Suggested-by: Daniel Kwon <dkwon@redhat.com>
> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
> ---
>  drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
> index 0e4f25d63fd2d..0b11210c882ee 100644
> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>  
>  	mutex_lock(&aux_idr_mutex);
>  	aux_dev = idr_find(&aux_idr, index);
> -	if (!kref_get_unless_zero(&aux_dev->refcount))
> +	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>  		aux_dev = NULL;
>  	mutex_unlock(&aux_idr_mutex);

-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-05-24  8:36   ` Jani Nikula
  (?)
@ 2019-05-24 10:48   ` tony camuso
  2019-05-24 11:58     ` Ville Syrjälä
  -1 siblings, 1 reply; 29+ messages in thread
From: tony camuso @ 2019-05-24 10:48 UTC (permalink / raw)
  To: Jani Nikula, dri-devel, linux-kernel; +Cc: airlied, dkwon

On 5/24/19 4:36 AM, Jani Nikula wrote:
> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>  From Daniel Kwon <dkwon@redhat.com>
>>
>> The system was crashed due to invalid memory access while trying to access
>> auxiliary device.
>>
>> crash> bt
>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>>   #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>   #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>   #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>   #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>   #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>   #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>   #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>   #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>   #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>   #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>      [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>      RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>>      RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>>      RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>>      RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>>      R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>>      R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>>      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>>      RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>>      RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>>      RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>>      RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>>      R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>>      R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>>      ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>>
>> ----------------------------------------------------------------------------
>>
>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>> check on this, but had failed to do it.
> 
> I think the better question is, *why* does the idr_find() return NULL? I
> don't think it should, under any circumstances. I fear adding the check
> here papers over some other problem, taking us further away from the
> root cause.

That's a very good question.

> Also, can you reproduce this on a recent upstream kernel? The aux device
> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> is pretty much irrelevant for upstream.

I will look into this deeper, using the upstream kernel.

> 
> 
> BR,
> Jani.

-- snip --


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-05-24 10:48   ` tony camuso
@ 2019-05-24 11:58     ` Ville Syrjälä
  0 siblings, 0 replies; 29+ messages in thread
From: Ville Syrjälä @ 2019-05-24 11:58 UTC (permalink / raw)
  To: tony camuso; +Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon

On Fri, May 24, 2019 at 06:48:32AM -0400, tony camuso wrote:
> On 5/24/19 4:36 AM, Jani Nikula wrote:
> > On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >>  From Daniel Kwon <dkwon@redhat.com>
> >>
> >> The system was crashed due to invalid memory access while trying to access
> >> auxiliary device.
> >>
> >> crash> bt
> >> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
> >>   #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >>   #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >>   #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >>   #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >>   #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >>   #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >>   #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >>   #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >>   #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >>   #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >>      [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >>      RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
> >>      RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
> >>      RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
> >>      RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
> >>      R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
> >>      R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
> >>      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> >>      RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
> >>      RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
> >>      RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
> >>      RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
> >>      R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
> >>      R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
> >>      ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
> >>
> >> ----------------------------------------------------------------------------
> >>
> >> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >> check on this, but had failed to do it.
> > 
> > I think the better question is, *why* does the idr_find() return NULL? I
> > don't think it should, under any circumstances. I fear adding the check
> > here papers over some other problem, taking us further away from the
> > root cause.
> 
> That's a very good question.
> 
> > Also, can you reproduce this on a recent upstream kernel? The aux device
> > nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> > is pretty much irrelevant for upstream.
> 
> I will look into this deeper, using the upstream kernel.

Should be trivial to reproduce with mknod. I wonder if we should stick a
test like that into igt actually. Not sure how happy people would be if
igt creates new device nodes...

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-05-24  8:36   ` Jani Nikula
  (?)
  (?)
@ 2019-07-10 13:47   ` Tony Camuso
  2019-07-10 13:56     ` Ville Syrjälä
  -1 siblings, 1 reply; 29+ messages in thread
From: Tony Camuso @ 2019-07-10 13:47 UTC (permalink / raw)
  To: Jani Nikula, dri-devel, linux-kernel; +Cc: airlied, dkwon

On 5/24/19 4:36 AM, Jani Nikula wrote:
> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>  From Daniel Kwon <dkwon@redhat.com>
>>
>> The system was crashed due to invalid memory access while trying to access
>> auxiliary device.
>>
>> crash> bt
>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>>   #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>   #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>   #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>   #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>   #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>   #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>   #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>   #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>   #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>   #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>      [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>      RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>>      RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>>      RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>>      RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>>      R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>>      R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>>      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>>      RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>>      RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>>      RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>>      RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>>      R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>>      R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>>      ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>>
>> ----------------------------------------------------------------------------
>>
>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>> check on this, but had failed to do it.
> 
> I think the better question is, *why* does the idr_find() return NULL? I
> don't think it should, under any circumstances. I fear adding the check
> here papers over some other problem, taking us further away from the
> root cause.
> 
> Also, can you reproduce this on a recent upstream kernel? The aux device
> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> is pretty much irrelevant for upstream.
> 
> 
> BR,
> Jani.

I have not been able to reproduce this problem.

However, whatever the reason idr_find() returns NULL, isn't it good form to
check it before using it? What would be the software engineering reason not
to do this?

> 
> 
> 
> 
>>
>> ----------------------------------------------------------------------------
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
>>       114 	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
>> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>:	mov    0x8a41(%rip),%rax        # 0xffffffffc0a613e0 <aux_idr>
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
>>       116 	if (hint && (id & ~IDR_MASK) == hint->prefix)
>>       117 		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
>> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>:	test   %rax,%rax
>> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>:	je     0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
>> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>:	mov    %ebx,%edx
>> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>:	xor    %dl,%dl
>> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>:	cmp    (%rax),%edx
>> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>:	je     0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
>>       119 	return idr_find_slowpath(idr, id);
>> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>:	mov    %ebx,%esi
>> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>:	mov    $0xffffffffc0a613e0,%rdi
>> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>:	callq  0xffffffffb09771b0 <idr_find_slowpath>
>> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>:	mov    %rax,%rbx
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
>>        25 	return ACCESS_ONCE((v)->counter);
>> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>:	mov    0x18(%rbx),%edx
>>
>> crash> struct file.f_path 0xffff89d18fadfa00
>>    f_path = {
>>      mnt = 0xffff89f23feaa620,
>>      dentry = 0xffff89f1be7a1cc0
>>    }
>> crash> files -d 0xffff89f1be7a1cc0
>>       DENTRY           INODE           SUPERBLK     TYPE PATH
>> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR  /dev/ipmi0
>>
>> crash> struct inode.i_rdev ffff89f1b32a2830
>>    i_rdev = 0xf200000
>> crash> eval (0xfffff & 0xf200000)
>> hexadecimal: 0
>>      decimal: 0
>>        octal: 0
>>       binary: 0000000000000000000000000000000000000000000000000000000000000000
>> ----------------------------------------------------------------------------
>>
>> As the index value was 0 and aux_idr had value 0 for all, it can have value
>> NULL from idr_find() function, but the below function doesn't check and just
>> tries to use it.
>>
>> ----------------------------------------------------------------------------
>> crash> aux_idr
>> aux_idr = $8 = {
>>    hint = 0x0,
>>    top = 0x0,
>>    id_free = 0x0,
>>    layers = 0x0,
>>    id_free_cnt = 0x0,
>>    cur = 0x0,
>>    lock = {
>>      {
>>        rlock = {
>>          raw_lock = {
>>            val = {
>>              counter = 0x0
>>            }
>>          }
>>        }
>>      }
>>    }
>> }
>>
>> crash> edis -f drm_dp_aux_dev_get_by_minor
>> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
>>
>>        56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>>        57 {
>>        58 	struct drm_dp_aux_dev *aux_dev = NULL;
>>        59
>>        60 	mutex_lock(&aux_idr_mutex);
>>        61 	aux_dev = idr_find(&aux_idr, index);
>>        62 	if (!kref_get_unless_zero(&aux_dev->refcount))
>>        63 		aux_dev = NULL;
>>        64 	mutex_unlock(&aux_idr_mutex);
>>        65
>>        66 	return aux_dev;
>>        67 }
>> ----------------------------------------------------------------------------
>>
>> To avoid this kinds of situation, we should make a safeguard for the returned
>> value. Changing the line 62 with the below would do.
>>
>>        62 	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>>                      ^^^^^^^^^^
>>  From Tony Camuso <tcamuso@redhat.com>
>> I built a patched kernel for several architectures.
>> Booted the kernel, and ran the following for 100 iterations.
>>     rmmod ipmi kmods to remove /dev/ipmi0.
>>     Invoked ipmitool
>>     insmod ipmi kmods
>> Did not see any crashes or call traces.
>>
>> Suggested-by: Daniel Kwon <dkwon@redhat.com>
>> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
>> ---
>>   drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
>> index 0e4f25d63fd2d..0b11210c882ee 100644
>> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
>> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
>> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
>>   
>>   	mutex_lock(&aux_idr_mutex);
>>   	aux_dev = idr_find(&aux_idr, index);
>> -	if (!kref_get_unless_zero(&aux_dev->refcount))
>> +	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
>>   		aux_dev = NULL;
>>   	mutex_unlock(&aux_idr_mutex);
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-07-10 13:47   ` Tony Camuso
@ 2019-07-10 13:56     ` Ville Syrjälä
  2019-07-12 16:07       ` Tony Camuso
  0 siblings, 1 reply; 29+ messages in thread
From: Ville Syrjälä @ 2019-07-10 13:56 UTC (permalink / raw)
  To: Tony Camuso; +Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon

On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
> On 5/24/19 4:36 AM, Jani Nikula wrote:
> > On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >>  From Daniel Kwon <dkwon@redhat.com>
> >>
> >> The system was crashed due to invalid memory access while trying to access
> >> auxiliary device.
> >>
> >> crash> bt
> >> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
> >>   #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >>   #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >>   #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >>   #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >>   #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >>   #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >>   #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >>   #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >>   #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >>   #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >>      [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >>      RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
> >>      RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
> >>      RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
> >>      RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
> >>      R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
> >>      R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
> >>      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> >>      RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
> >>      RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
> >>      RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
> >>      RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
> >>      R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
> >>      R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
> >>      ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
> >>
> >> ----------------------------------------------------------------------------
> >>
> >> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >> check on this, but had failed to do it.
> > 
> > I think the better question is, *why* does the idr_find() return NULL? I
> > don't think it should, under any circumstances. I fear adding the check
> > here papers over some other problem, taking us further away from the
> > root cause.
> > 
> > Also, can you reproduce this on a recent upstream kernel? The aux device
> > nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> > is pretty much irrelevant for upstream.
> > 
> > 
> > BR,
> > Jani.
> 
> I have not been able to reproduce this problem.

mknod /dev/foo c <drm_dp_aux major> 255
cat /dev/foo

should do it.

> 
> However, whatever the reason idr_find() returns NULL, isn't it good form to
> check it before using it? What would be the software engineering reason not
> to do this?
> 
> > 
> > 
> > 
> > 
> >>
> >> ----------------------------------------------------------------------------
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 114
> >>       114 	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
> >> 0xffffffffc0a58998 <drm_dp_aux_dev_get_by_minor+0x18>:	mov    0x8a41(%rip),%rax        # 0xffffffffc0a613e0 <aux_idr>
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 116
> >>       116 	if (hint && (id & ~IDR_MASK) == hint->prefix)
> >>       117 		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
> >> 0xffffffffc0a5899f <drm_dp_aux_dev_get_by_minor+0x1f>:	test   %rax,%rax
> >> 0xffffffffc0a589a2 <drm_dp_aux_dev_get_by_minor+0x22>:	je     0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>
> >> 0xffffffffc0a589a4 <drm_dp_aux_dev_get_by_minor+0x24>:	mov    %ebx,%edx
> >> 0xffffffffc0a589a6 <drm_dp_aux_dev_get_by_minor+0x26>:	xor    %dl,%dl
> >> 0xffffffffc0a589a8 <drm_dp_aux_dev_get_by_minor+0x28>:	cmp    (%rax),%edx
> >> 0xffffffffc0a589aa <drm_dp_aux_dev_get_by_minor+0x2a>:	je     0xffffffffc0a589f0 <drm_dp_aux_dev_get_by_minor+0x70>
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/include/linux/idr.h: 119
> >>       119 	return idr_find_slowpath(idr, id);
> >> 0xffffffffc0a589ac <drm_dp_aux_dev_get_by_minor+0x2c>:	mov    %ebx,%esi
> >> 0xffffffffc0a589ae <drm_dp_aux_dev_get_by_minor+0x2e>:	mov    $0xffffffffc0a613e0,%rdi
> >> 0xffffffffc0a589b5 <drm_dp_aux_dev_get_by_minor+0x35>:	callq  0xffffffffb09771b0 <idr_find_slowpath>
> >> 0xffffffffc0a589ba <drm_dp_aux_dev_get_by_minor+0x3a>:	mov    %rax,%rbx
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/arch/x86/include/asm/atomic.h: 25
> >>        25 	return ACCESS_ONCE((v)->counter);
> >> 0xffffffffc0a589bd <drm_dp_aux_dev_get_by_minor+0x3d>:	mov    0x18(%rbx),%edx
> >>
> >> crash> struct file.f_path 0xffff89d18fadfa00
> >>    f_path = {
> >>      mnt = 0xffff89f23feaa620,
> >>      dentry = 0xffff89f1be7a1cc0
> >>    }
> >> crash> files -d 0xffff89f1be7a1cc0
> >>       DENTRY           INODE           SUPERBLK     TYPE PATH
> >> ffff89f1be7a1cc0 ffff89f1b32a2830 ffff89d293aa8800 CHR  /dev/ipmi0
> >>
> >> crash> struct inode.i_rdev ffff89f1b32a2830
> >>    i_rdev = 0xf200000
> >> crash> eval (0xfffff & 0xf200000)
> >> hexadecimal: 0
> >>      decimal: 0
> >>        octal: 0
> >>       binary: 0000000000000000000000000000000000000000000000000000000000000000
> >> ----------------------------------------------------------------------------
> >>
> >> As the index value was 0 and aux_idr had value 0 for all, it can have value
> >> NULL from idr_find() function, but the below function doesn't check and just
> >> tries to use it.
> >>
> >> ----------------------------------------------------------------------------
> >> crash> aux_idr
> >> aux_idr = $8 = {
> >>    hint = 0x0,
> >>    top = 0x0,
> >>    id_free = 0x0,
> >>    layers = 0x0,
> >>    id_free_cnt = 0x0,
> >>    cur = 0x0,
> >>    lock = {
> >>      {
> >>        rlock = {
> >>          raw_lock = {
> >>            val = {
> >>              counter = 0x0
> >>            }
> >>          }
> >>        }
> >>      }
> >>    }
> >> }
> >>
> >> crash> edis -f drm_dp_aux_dev_get_by_minor
> >> /usr/src/debug/kernel-3.10.0-957.12.1.el7/linux-3.10.0-957.12.1.el7.x86_64/drivers/gpu/drm/drm_dp_aux_dev.c: 57
> >>
> >>        56 static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> >>        57 {
> >>        58 	struct drm_dp_aux_dev *aux_dev = NULL;
> >>        59
> >>        60 	mutex_lock(&aux_idr_mutex);
> >>        61 	aux_dev = idr_find(&aux_idr, index);
> >>        62 	if (!kref_get_unless_zero(&aux_dev->refcount))
> >>        63 		aux_dev = NULL;
> >>        64 	mutex_unlock(&aux_idr_mutex);
> >>        65
> >>        66 	return aux_dev;
> >>        67 }
> >> ----------------------------------------------------------------------------
> >>
> >> To avoid this kinds of situation, we should make a safeguard for the returned
> >> value. Changing the line 62 with the below would do.
> >>
> >>        62 	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
> >>                      ^^^^^^^^^^
> >>  From Tony Camuso <tcamuso@redhat.com>
> >> I built a patched kernel for several architectures.
> >> Booted the kernel, and ran the following for 100 iterations.
> >>     rmmod ipmi kmods to remove /dev/ipmi0.
> >>     Invoked ipmitool
> >>     insmod ipmi kmods
> >> Did not see any crashes or call traces.
> >>
> >> Suggested-by: Daniel Kwon <dkwon@redhat.com>
> >> Signed-off-by: Tony Camuso <tcamuso@redhat.com>
> >> ---
> >>   drivers/gpu/drm/drm_dp_aux_dev.c | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
> >> index 0e4f25d63fd2d..0b11210c882ee 100644
> >> --- a/drivers/gpu/drm/drm_dp_aux_dev.c
> >> +++ b/drivers/gpu/drm/drm_dp_aux_dev.c
> >> @@ -60,7 +60,7 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> >>   
> >>   	mutex_lock(&aux_idr_mutex);
> >>   	aux_dev = idr_find(&aux_idr, index);
> >> -	if (!kref_get_unless_zero(&aux_dev->refcount))
> >> +	if (aux_dev && !kref_get_unless_zero(&aux_dev->refcount))
> >>   		aux_dev = NULL;
> >>   	mutex_unlock(&aux_idr_mutex);
> > 
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-07-10 13:56     ` Ville Syrjälä
@ 2019-07-12 16:07       ` Tony Camuso
  2019-07-12 17:06         ` Ville Syrjälä
  0 siblings, 1 reply; 29+ messages in thread
From: Tony Camuso @ 2019-07-12 16:07 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon

On 7/10/19 9:56 AM, Ville Syrjälä wrote:
> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
>> On 5/24/19 4:36 AM, Jani Nikula wrote:
>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>>>   From Daniel Kwon <dkwon@redhat.com>
>>>>
>>>> The system was crashed due to invalid memory access while trying to access
>>>> auxiliary device.
>>>>
>>>> crash> bt
>>>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>>>>    #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>>>    #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>>>    #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>>>    #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>>>    #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>>>    #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>>>    #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>>>    #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>>>    #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>>>    #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>>>       [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>>>       RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>>>>       RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>>>>       RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>>>>       RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>>>>       R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>>>>       R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>>>>       ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>>>>       RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>>>>       RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>>>>       RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>>>>       RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>>>>       R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>>>>       R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>>>>       ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>>>>
>>>> ----------------------------------------------------------------------------
>>>>
>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>>>> check on this, but had failed to do it.
>>>
>>> I think the better question is, *why* does the idr_find() return NULL? I
>>> don't think it should, under any circumstances. I fear adding the check
>>> here papers over some other problem, taking us further away from the
>>> root cause.
>>>
>>> Also, can you reproduce this on a recent upstream kernel? The aux device
>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
>>> is pretty much irrelevant for upstream.
>>>
>>>
>>> BR,
>>> Jani.
>>
>> I have not been able to reproduce this problem.
> 
> mknod /dev/foo c <drm_dp_aux major> 255
> cat /dev/foo
> 
> should do it.

How do I determine <drm_dp_aux major>?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-07-12 16:07       ` Tony Camuso
@ 2019-07-12 17:06         ` Ville Syrjälä
  2019-07-12 17:35           ` Tony Camuso
  2019-09-23 15:03           ` Tony Camuso
  0 siblings, 2 replies; 29+ messages in thread
From: Ville Syrjälä @ 2019-07-12 17:06 UTC (permalink / raw)
  To: Tony Camuso; +Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon

On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
> > On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
> >> On 5/24/19 4:36 AM, Jani Nikula wrote:
> >>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >>>>   From Daniel Kwon <dkwon@redhat.com>
> >>>>
> >>>> The system was crashed due to invalid memory access while trying to access
> >>>> auxiliary device.
> >>>>
> >>>> crash> bt
> >>>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
> >>>>    #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >>>>    #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >>>>    #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >>>>    #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >>>>    #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >>>>    #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >>>>    #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >>>>    #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >>>>    #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >>>>    #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >>>>       [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >>>>       RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
> >>>>       RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
> >>>>       RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
> >>>>       RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
> >>>>       R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
> >>>>       R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
> >>>>       ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> >>>>       RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
> >>>>       RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
> >>>>       RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
> >>>>       RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
> >>>>       R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
> >>>>       R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
> >>>>       ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
> >>>>
> >>>> ----------------------------------------------------------------------------
> >>>>
> >>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >>>> check on this, but had failed to do it.
> >>>
> >>> I think the better question is, *why* does the idr_find() return NULL? I
> >>> don't think it should, under any circumstances. I fear adding the check
> >>> here papers over some other problem, taking us further away from the
> >>> root cause.
> >>>
> >>> Also, can you reproduce this on a recent upstream kernel? The aux device
> >>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> >>> is pretty much irrelevant for upstream.
> >>>
> >>>
> >>> BR,
> >>> Jani.
> >>
> >> I have not been able to reproduce this problem.
> > 
> > mknod /dev/foo c <drm_dp_aux major> 255
> > cat /dev/foo
> > 
> > should do it.
> 
> How do I determine <drm_dp_aux major>?

ls,file,stat. Take your pick.

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-07-12 17:06         ` Ville Syrjälä
@ 2019-07-12 17:35           ` Tony Camuso
  2019-09-23 15:03           ` Tony Camuso
  1 sibling, 0 replies; 29+ messages in thread
From: Tony Camuso @ 2019-07-12 17:35 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon

On 7/12/19 1:06 PM, Ville Syrjälä wrote:
> On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
>> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
>>> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
>>>> On 5/24/19 4:36 AM, Jani Nikula wrote:
>>>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>>>>>    From Daniel Kwon <dkwon@redhat.com>
>>>>>>
>>>>>> The system was crashed due to invalid memory access while trying to access
>>>>>> auxiliary device.
>>>>>>
>>>>>> crash> bt
>>>>>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>>>>>>     #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>>>>>     #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>>>>>     #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>>>>>     #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>>>>>     #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>>>>>     #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>>>>>     #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>>>>>     #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>>>>>     #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>>>>>     #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>>>>>        [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>>>>>        RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>>>>>>        RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>>>>>>        RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>>>>>>        RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>>>>>>        R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>>>>>>        R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>>>>>>        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>>>>>>        RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>>>>>>        RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>>>>>>        RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>>>>>>        RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>>>>>>        R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>>>>>>        R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>>>>>>        ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>>>>>>
>>>>>> ----------------------------------------------------------------------------
>>>>>>
>>>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>>>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>>>>>> check on this, but had failed to do it.
>>>>>
>>>>> I think the better question is, *why* does the idr_find() return NULL? I
>>>>> don't think it should, under any circumstances. I fear adding the check
>>>>> here papers over some other problem, taking us further away from the
>>>>> root cause.
>>>>>
>>>>> Also, can you reproduce this on a recent upstream kernel? The aux device
>>>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
>>>>> is pretty much irrelevant for upstream.
>>>>>
>>>>>
>>>>> BR,
>>>>> Jani.
>>>>
>>>> I have not been able to reproduce this problem.
>>>
>>> mknod /dev/foo c <drm_dp_aux major> 255
>>> cat /dev/foo
>>>
>>> should do it.
>>
>> How do I determine <drm_dp_aux major>?
> 
> ls,file,stat. Take your pick.
> 

Doh. Thanks!!


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-07-12 17:06         ` Ville Syrjälä
  2019-07-12 17:35           ` Tony Camuso
@ 2019-09-23 15:03           ` Tony Camuso
  2019-09-23 15:22             ` Ville Syrjälä
  1 sibling, 1 reply; 29+ messages in thread
From: Tony Camuso @ 2019-09-23 15:03 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon,
	Joe Donahue, John Feeney

On 7/12/19 1:06 PM, Ville Syrjälä wrote:
> On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
>> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
>>> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
>>>> On 5/24/19 4:36 AM, Jani Nikula wrote:
>>>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
>>>>>>    From Daniel Kwon <dkwon@redhat.com>
>>>>>>
>>>>>> The system was crashed due to invalid memory access while trying to access
>>>>>> auxiliary device.
>>>>>>
>>>>>> crash> bt
>>>>>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
>>>>>>     #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
>>>>>>     #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
>>>>>>     #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
>>>>>>     #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
>>>>>>     #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
>>>>>>     #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
>>>>>>     #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
>>>>>>     #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
>>>>>>     #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
>>>>>>     #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
>>>>>>        [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
>>>>>>        RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
>>>>>>        RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
>>>>>>        RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
>>>>>>        RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
>>>>>>        R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
>>>>>>        R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
>>>>>>        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>>>>>>        RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
>>>>>>        RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
>>>>>>        RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
>>>>>>        RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
>>>>>>        R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
>>>>>>        R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
>>>>>>        ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
>>>>>>
>>>>>> ----------------------------------------------------------------------------
>>>>>>
>>>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
>>>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
>>>>>> check on this, but had failed to do it.
>>>>>
>>>>> I think the better question is, *why* does the idr_find() return NULL? I
>>>>> don't think it should, under any circumstances. I fear adding the check
>>>>> here papers over some other problem, taking us further away from the
>>>>> root cause.
>>>>>
>>>>> Also, can you reproduce this on a recent upstream kernel? The aux device
>>>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
>>>>> is pretty much irrelevant for upstream.
>>>>>
>>>>>
>>>>> BR,
>>>>> Jani.
>>>>
>>>> I have not been able to reproduce this problem.
>>>
>>> mknod /dev/foo c <drm_dp_aux major> 255
>>> cat /dev/foo
>>>
>>> should do it.
>>
>> How do I determine <drm_dp_aux major>?
> 
> ls,file,stat. Take your pick.
> 

Problem here is I can't ls,file,stat /dev/foo until after it's created,
but I need to know the drm_dp_aux major number befroe I can use mknod.

What am I missing here?


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2019-09-23 15:03           ` Tony Camuso
@ 2019-09-23 15:22             ` Ville Syrjälä
  0 siblings, 0 replies; 29+ messages in thread
From: Ville Syrjälä @ 2019-09-23 15:22 UTC (permalink / raw)
  To: Tony Camuso
  Cc: Jani Nikula, dri-devel, linux-kernel, airlied, dkwon,
	Joe Donahue, John Feeney

On Mon, Sep 23, 2019 at 11:03:35AM -0400, Tony Camuso wrote:
> On 7/12/19 1:06 PM, Ville Syrjälä wrote:
> > On Fri, Jul 12, 2019 at 12:07:46PM -0400, Tony Camuso wrote:
> >> On 7/10/19 9:56 AM, Ville Syrjälä wrote:
> >>> On Wed, Jul 10, 2019 at 09:47:11AM -0400, Tony Camuso wrote:
> >>>> On 5/24/19 4:36 AM, Jani Nikula wrote:
> >>>>> On Thu, 23 May 2019, tcamuso <tcamuso@redhat.com> wrote:
> >>>>>>    From Daniel Kwon <dkwon@redhat.com>
> >>>>>>
> >>>>>> The system was crashed due to invalid memory access while trying to access
> >>>>>> auxiliary device.
> >>>>>>
> >>>>>> crash> bt
> >>>>>> PID: 9863   TASK: ffff89d1bdf11040  CPU: 1   COMMAND: "ipmitool"
> >>>>>>     #0 [ffff89cedd7f3868] machine_kexec at ffffffffb0663674
> >>>>>>     #1 [ffff89cedd7f38c8] __crash_kexec at ffffffffb071cf62
> >>>>>>     #2 [ffff89cedd7f3998] crash_kexec at ffffffffb071d050
> >>>>>>     #3 [ffff89cedd7f39b0] oops_end at ffffffffb0d6d758
> >>>>>>     #4 [ffff89cedd7f39d8] no_context at ffffffffb0d5bcde
> >>>>>>     #5 [ffff89cedd7f3a28] __bad_area_nosemaphore at ffffffffb0d5bd75
> >>>>>>     #6 [ffff89cedd7f3a78] bad_area at ffffffffb0d5c085
> >>>>>>     #7 [ffff89cedd7f3aa0] __do_page_fault at ffffffffb0d7080c
> >>>>>>     #8 [ffff89cedd7f3b10] do_page_fault at ffffffffb0d70905
> >>>>>>     #9 [ffff89cedd7f3b40] page_fault at ffffffffb0d6c758
> >>>>>>        [exception RIP: drm_dp_aux_dev_get_by_minor+0x3d]
> >>>>>>        RIP: ffffffffc0a589bd  RSP: ffff89cedd7f3bf0  RFLAGS: 00010246
> >>>>>>        RAX: 0000000000000000  RBX: 0000000000000000  RCX: ffff89cedd7f3fd8
> >>>>>>        RDX: 0000000000000000  RSI: 0000000000000000  RDI: ffffffffc0a613e0
> >>>>>>        RBP: ffff89cedd7f3bf8   R8: ffff89f1bcbabbd0   R9: 0000000000000000
> >>>>>>        R10: ffff89f1be7a1cc0  R11: 0000000000000000  R12: 0000000000000000
> >>>>>>        R13: ffff89f1b32a2830  R14: ffff89d18fadfa00  R15: 0000000000000000
> >>>>>>        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> >>>>>>        RIP: 00002b45f0d80d30  RSP: 00007ffc416066a0  RFLAGS: 00010246
> >>>>>>        RAX: 0000000000000002  RBX: 000056062e212d80  RCX: 00007ffc41606810
> >>>>>>        RDX: 0000000000000000  RSI: 0000000000000002  RDI: 00007ffc41606ec0
> >>>>>>        RBP: 0000000000000000   R8: 000056062dfed229   R9: 00002b45f0cdf14d
> >>>>>>        R10: 0000000000000002  R11: 0000000000000246  R12: 00007ffc41606ec0
> >>>>>>        R13: 00007ffc41606ed0  R14: 00007ffc41606ee0  R15: 0000000000000000
> >>>>>>        ORIG_RAX: 0000000000000002  CS: 0033  SS: 002b
> >>>>>>
> >>>>>> ----------------------------------------------------------------------------
> >>>>>>
> >>>>>> It was trying to open '/dev/ipmi0', but as no entry in aux_dir, it returned
> >>>>>> NULL from 'idr_find()'. This drm_dp_aux_dev_get_by_minor() should have done a
> >>>>>> check on this, but had failed to do it.
> >>>>>
> >>>>> I think the better question is, *why* does the idr_find() return NULL? I
> >>>>> don't think it should, under any circumstances. I fear adding the check
> >>>>> here papers over some other problem, taking us further away from the
> >>>>> root cause.
> >>>>>
> >>>>> Also, can you reproduce this on a recent upstream kernel? The aux device
> >>>>> nodes were introduced in kernel v4.6. Whatever you reproduced on v3.10
> >>>>> is pretty much irrelevant for upstream.
> >>>>>
> >>>>>
> >>>>> BR,
> >>>>> Jani.
> >>>>
> >>>> I have not been able to reproduce this problem.
> >>>
> >>> mknod /dev/foo c <drm_dp_aux major> 255
> >>> cat /dev/foo
> >>>
> >>> should do it.
> >>
> >> How do I determine <drm_dp_aux major>?
> > 
> > ls,file,stat. Take your pick.
> > 
> 
> Problem here is I can't ls,file,stat /dev/foo until after it's created,
> but I need to know the drm_dp_aux major number befroe I can use mknod.
> 
> What am I missing here?

udev/whatever should create a bunch of these for you so you can check
from them. If not, then dig around in /sys/class/drm_dp_aux_dev.

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-18 17:58           ` Zwane Mwaikambo
@ 2020-09-08 18:41             ` Lyude Paul
  -1 siblings, 0 replies; 29+ messages in thread
From: Lyude Paul @ 2020-09-08 18:41 UTC (permalink / raw)
  To: Zwane Mwaikambo; +Cc: Daniel Vetter, tcamuso, dkwon, Linux Kernel, dri-devel

On Tue, 2020-08-18 at 10:58 -0700, Zwane Mwaikambo wrote:
> On Wed, 12 Aug 2020, Lyude Paul wrote:
> 
> > On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com>
> > > wrote:
> > > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > > 
> > > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > > Hi Folks,
> > > > > >     I know this thread eventually dropped off due to not
> > > > > > identifying
> > > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > > happened because the udev device nodes for the DP aux devices were
> > > > > > not
> > > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > > reproduce the bug just by creating a device node for a non-
> > > > > > existent
> > > > > > minor
> > > > > > device and calling open().
> > > > > 
> > > > > Hm I don't have that thread anymore, but generally these bugs are
> > > > > solved
> > > > > by not registering the device before it's ready for use. We do have
> > > > > drm_connector->late_register for that stuff. Just a guess since I'm
> > > > > not
> > > > > seeing full details here.
> > > > 
> > > > In this particular case, the physical device disappeared before the
> > > > nodes
> > > > were cleaned up. It involves putting a computer to sleep with a
> > > > monitor
> > > > plugged in and then waking it up with the monitor unplugged.
> > > 
> > > We also have early_unregister for the reverse, but yes this sounds
> > > more tricky ... Adding Lyude who's been working on way too much
> > > lifetime fun around dp recently.
> > > -Daniel
> > > 
> > Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> > fix, although I am curious as to how exactly the aux dev's parent is
> > getting
> > destroyed before it's child, which I would have thought would be the only
> > way
> > you could hit this?
> 
> Hi, If this is acceptable, would you consider an updated patch against 
> 5.8?

Sure-although the process to getting this into stable is to get the patch into
drm-next first, then it can get cherry-picked into the stable kernel branches.
See https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html

> 
> Thanks,
> 	Zwane
> 
-- 
Cheers,
	Lyude Paul (she/her)
	Software Engineer at Red Hat


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-09-08 18:41             ` Lyude Paul
  0 siblings, 0 replies; 29+ messages in thread
From: Lyude Paul @ 2020-09-08 18:41 UTC (permalink / raw)
  To: Zwane Mwaikambo; +Cc: dri-devel, dkwon, Linux Kernel, tcamuso

On Tue, 2020-08-18 at 10:58 -0700, Zwane Mwaikambo wrote:
> On Wed, 12 Aug 2020, Lyude Paul wrote:
> 
> > On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com>
> > > wrote:
> > > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > > 
> > > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > > Hi Folks,
> > > > > >     I know this thread eventually dropped off due to not
> > > > > > identifying
> > > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > > happened because the udev device nodes for the DP aux devices were
> > > > > > not
> > > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > > reproduce the bug just by creating a device node for a non-
> > > > > > existent
> > > > > > minor
> > > > > > device and calling open().
> > > > > 
> > > > > Hm I don't have that thread anymore, but generally these bugs are
> > > > > solved
> > > > > by not registering the device before it's ready for use. We do have
> > > > > drm_connector->late_register for that stuff. Just a guess since I'm
> > > > > not
> > > > > seeing full details here.
> > > > 
> > > > In this particular case, the physical device disappeared before the
> > > > nodes
> > > > were cleaned up. It involves putting a computer to sleep with a
> > > > monitor
> > > > plugged in and then waking it up with the monitor unplugged.
> > > 
> > > We also have early_unregister for the reverse, but yes this sounds
> > > more tricky ... Adding Lyude who's been working on way too much
> > > lifetime fun around dp recently.
> > > -Daniel
> > > 
> > Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> > fix, although I am curious as to how exactly the aux dev's parent is
> > getting
> > destroyed before it's child, which I would have thought would be the only
> > way
> > you could hit this?
> 
> Hi, If this is acceptable, would you consider an updated patch against 
> 5.8?

Sure-although the process to getting this into stable is to get the patch into
drm-next first, then it can get cherry-picked into the stable kernel branches.
See https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html

> 
> Thanks,
> 	Zwane
> 
-- 
Cheers,
	Lyude Paul (she/her)
	Software Engineer at Red Hat

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-12 15:44         ` Lyude Paul
@ 2020-08-18 17:58           ` Zwane Mwaikambo
  -1 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-18 17:58 UTC (permalink / raw)
  To: Lyude Paul; +Cc: Daniel Vetter, tcamuso, dkwon, Linux Kernel, dri-devel

On Wed, 12 Aug 2020, Lyude Paul wrote:

> On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > 
> > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > Hi Folks,
> > > > >     I know this thread eventually dropped off due to not identifying
> > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > happened because the udev device nodes for the DP aux devices were not
> > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > reproduce the bug just by creating a device node for a non-existent
> > > > > minor
> > > > > device and calling open().
> > > > 
> > > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > > by not registering the device before it's ready for use. We do have
> > > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > > seeing full details here.
> > > 
> > > In this particular case, the physical device disappeared before the nodes
> > > were cleaned up. It involves putting a computer to sleep with a monitor
> > > plugged in and then waking it up with the monitor unplugged.
> > 
> > We also have early_unregister for the reverse, but yes this sounds
> > more tricky ... Adding Lyude who's been working on way too much
> > lifetime fun around dp recently.
> > -Daniel
> > 
> Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> fix, although I am curious as to how exactly the aux dev's parent is getting
> destroyed before it's child, which I would have thought would be the only way
> you could hit this?

Hi, If this is acceptable, would you consider an updated patch against 
5.8?

Thanks,
	Zwane

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-18 17:58           ` Zwane Mwaikambo
  0 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-18 17:58 UTC (permalink / raw)
  To: Lyude Paul; +Cc: dri-devel, dkwon, Linux Kernel, tcamuso

On Wed, 12 Aug 2020, Lyude Paul wrote:

> On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > 
> > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > Hi Folks,
> > > > >     I know this thread eventually dropped off due to not identifying
> > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > happened because the udev device nodes for the DP aux devices were not
> > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > reproduce the bug just by creating a device node for a non-existent
> > > > > minor
> > > > > device and calling open().
> > > > 
> > > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > > by not registering the device before it's ready for use. We do have
> > > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > > seeing full details here.
> > > 
> > > In this particular case, the physical device disappeared before the nodes
> > > were cleaned up. It involves putting a computer to sleep with a monitor
> > > plugged in and then waking it up with the monitor unplugged.
> > 
> > We also have early_unregister for the reverse, but yes this sounds
> > more tricky ... Adding Lyude who's been working on way too much
> > lifetime fun around dp recently.
> > -Daniel
> > 
> Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> fix, although I am curious as to how exactly the aux dev's parent is getting
> destroyed before it's child, which I would have thought would be the only way
> you could hit this?

Hi, If this is acceptable, would you consider an updated patch against 
5.8?

Thanks,
	Zwane
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-12 15:44         ` Lyude Paul
@ 2020-08-12 20:21           ` Zwane Mwaikambo
  -1 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-12 20:21 UTC (permalink / raw)
  To: Lyude Paul; +Cc: Daniel Vetter, tcamuso, dkwon, Linux Kernel, dri-devel

On Wed, 12 Aug 2020, Lyude Paul wrote:

> On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > 
> > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > Hi Folks,
> > > > >     I know this thread eventually dropped off due to not identifying
> > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > happened because the udev device nodes for the DP aux devices were not
> > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > reproduce the bug just by creating a device node for a non-existent
> > > > > minor
> > > > > device and calling open().
> > > > 
> > > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > > by not registering the device before it's ready for use. We do have
> > > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > > seeing full details here.
> > > 
> > > In this particular case, the physical device disappeared before the nodes
> > > were cleaned up. It involves putting a computer to sleep with a monitor
> > > plugged in and then waking it up with the monitor unplugged.
> > 
> > We also have early_unregister for the reverse, but yes this sounds
> > more tricky ... Adding Lyude who's been working on way too much
> > lifetime fun around dp recently.
> > -Daniel
> > 
> Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> fix, although I am curious as to how exactly the aux dev's parent is getting
> destroyed before it's child, which I would have thought would be the only way
> you could hit this?

Here is what it looks like without (1) and with (2) monitor connected. In 
the case where the monitor disappears during suspend, the device nodes 
aux3,4 are still around

1) No monitor connected
ls -l /dev/drm*
crw------- 1 root root 238, 0 Aug  6 22:32 /dev/drm_dp_aux0
crw------- 1 root root 238, 1 Aug  6 22:32 /dev/drm_dp_aux1


2) Monitor connected
crw------- 1 root root 238, 0 Aug  6 22:32 /dev/drm_dp_aux0
crw------- 1 root root 238, 1 Aug  6 22:32 /dev/drm_dp_aux1
crw------- 1 root root 238, 2 Aug 11 14:51 /dev/drm_dp_aux2
crw------- 1 root root 238, 3 Aug 11 14:51 /dev/drm_dp_aux3
crw------- 1 root root 238, 4 Aug 11 14:51 /dev/drm_dp_aux4



> 
> > > 
> > > > > To me it still makes sense to just check aux_dev because the chardev
> > > > > has
> > > > > no way to check before calling.
> > > > > 
> > > > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > > > 0x17b39 is in drm_dp_aux_dev_get_by_minor
> > > > > (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > > > 60      static struct drm_dp_aux_dev
> > > > > *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > > > 61      {
> > > > > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > > > > 63
> > > > > 64              mutex_lock(&aux_idr_mutex);
> > > > > 65              aux_dev = idr_find(&aux_idr, index);
> > > > > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > > > > 67                      aux_dev = NULL;
> > > > > 68              mutex_unlock(&aux_idr_mutex);
> > > > > 69
> > > > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > > > $8 = 0x18
> > > > > 
> > > > > static int auxdev_open(struct inode *inode, struct file *file)
> > > > > {
> > > > >     unsigned int minor = iminor(inode);
> > > > >     struct drm_dp_aux_dev *aux_dev;
> > > > > 
> > > > >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > > >     if (!aux_dev)
> > > > >         return -ENODEV;
> > > > > 
> > > > >     file->private_data = aux_dev;
> > > > >     return 0;
> > > > > }
> > > > > 
> > > > > 
> > > > > _______________________________________________
> > > > > dri-devel mailing list
> > > > > dri-devel@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-12 20:21           ` Zwane Mwaikambo
  0 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-12 20:21 UTC (permalink / raw)
  To: Lyude Paul; +Cc: dri-devel, dkwon, Linux Kernel, tcamuso

On Wed, 12 Aug 2020, Lyude Paul wrote:

> On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> > On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > > 
> > > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > > Hi Folks,
> > > > >     I know this thread eventually dropped off due to not identifying
> > > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > > happened because the udev device nodes for the DP aux devices were not
> > > > > cleaned up whereas the kernel had no association with them. I can
> > > > > reproduce the bug just by creating a device node for a non-existent
> > > > > minor
> > > > > device and calling open().
> > > > 
> > > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > > by not registering the device before it's ready for use. We do have
> > > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > > seeing full details here.
> > > 
> > > In this particular case, the physical device disappeared before the nodes
> > > were cleaned up. It involves putting a computer to sleep with a monitor
> > > plugged in and then waking it up with the monitor unplugged.
> > 
> > We also have early_unregister for the reverse, but yes this sounds
> > more tricky ... Adding Lyude who's been working on way too much
> > lifetime fun around dp recently.
> > -Daniel
> > 
> Hi-I think just checking whether the auxdev is NULL or not is a reasonable
> fix, although I am curious as to how exactly the aux dev's parent is getting
> destroyed before it's child, which I would have thought would be the only way
> you could hit this?

Here is what it looks like without (1) and with (2) monitor connected. In 
the case where the monitor disappears during suspend, the device nodes 
aux3,4 are still around

1) No monitor connected
ls -l /dev/drm*
crw------- 1 root root 238, 0 Aug  6 22:32 /dev/drm_dp_aux0
crw------- 1 root root 238, 1 Aug  6 22:32 /dev/drm_dp_aux1


2) Monitor connected
crw------- 1 root root 238, 0 Aug  6 22:32 /dev/drm_dp_aux0
crw------- 1 root root 238, 1 Aug  6 22:32 /dev/drm_dp_aux1
crw------- 1 root root 238, 2 Aug 11 14:51 /dev/drm_dp_aux2
crw------- 1 root root 238, 3 Aug 11 14:51 /dev/drm_dp_aux3
crw------- 1 root root 238, 4 Aug 11 14:51 /dev/drm_dp_aux4



> 
> > > 
> > > > > To me it still makes sense to just check aux_dev because the chardev
> > > > > has
> > > > > no way to check before calling.
> > > > > 
> > > > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > > > 0x17b39 is in drm_dp_aux_dev_get_by_minor
> > > > > (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > > > 60      static struct drm_dp_aux_dev
> > > > > *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > > > 61      {
> > > > > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > > > > 63
> > > > > 64              mutex_lock(&aux_idr_mutex);
> > > > > 65              aux_dev = idr_find(&aux_idr, index);
> > > > > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > > > > 67                      aux_dev = NULL;
> > > > > 68              mutex_unlock(&aux_idr_mutex);
> > > > > 69
> > > > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > > > $8 = 0x18
> > > > > 
> > > > > static int auxdev_open(struct inode *inode, struct file *file)
> > > > > {
> > > > >     unsigned int minor = iminor(inode);
> > > > >     struct drm_dp_aux_dev *aux_dev;
> > > > > 
> > > > >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > > >     if (!aux_dev)
> > > > >         return -ENODEV;
> > > > > 
> > > > >     file->private_data = aux_dev;
> > > > >     return 0;
> > > > > }
> > > > > 
> > > > > 
> > > > > _______________________________________________
> > > > > dri-devel mailing list
> > > > > dri-devel@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > 
> > 
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-12 14:10       ` Daniel Vetter
@ 2020-08-12 15:44         ` Lyude Paul
  -1 siblings, 0 replies; 29+ messages in thread
From: Lyude Paul @ 2020-08-12 15:44 UTC (permalink / raw)
  To: Daniel Vetter, Zwane Mwaikambo; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel

On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > 
> > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > Hi Folks,
> > > >     I know this thread eventually dropped off due to not identifying
> > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > happened because the udev device nodes for the DP aux devices were not
> > > > cleaned up whereas the kernel had no association with them. I can
> > > > reproduce the bug just by creating a device node for a non-existent
> > > > minor
> > > > device and calling open().
> > > 
> > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > by not registering the device before it's ready for use. We do have
> > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > seeing full details here.
> > 
> > In this particular case, the physical device disappeared before the nodes
> > were cleaned up. It involves putting a computer to sleep with a monitor
> > plugged in and then waking it up with the monitor unplugged.
> 
> We also have early_unregister for the reverse, but yes this sounds
> more tricky ... Adding Lyude who's been working on way too much
> lifetime fun around dp recently.
> -Daniel
> 
Hi-I think just checking whether the auxdev is NULL or not is a reasonable
fix, although I am curious as to how exactly the aux dev's parent is getting
destroyed before it's child, which I would have thought would be the only way
you could hit this?

> > 
> > > > To me it still makes sense to just check aux_dev because the chardev
> > > > has
> > > > no way to check before calling.
> > > > 
> > > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > > 0x17b39 is in drm_dp_aux_dev_get_by_minor
> > > > (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > > 60      static struct drm_dp_aux_dev
> > > > *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > > 61      {
> > > > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > > > 63
> > > > 64              mutex_lock(&aux_idr_mutex);
> > > > 65              aux_dev = idr_find(&aux_idr, index);
> > > > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > > > 67                      aux_dev = NULL;
> > > > 68              mutex_unlock(&aux_idr_mutex);
> > > > 69
> > > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > > $8 = 0x18
> > > > 
> > > > static int auxdev_open(struct inode *inode, struct file *file)
> > > > {
> > > >     unsigned int minor = iminor(inode);
> > > >     struct drm_dp_aux_dev *aux_dev;
> > > > 
> > > >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > >     if (!aux_dev)
> > > >         return -ENODEV;
> > > > 
> > > >     file->private_data = aux_dev;
> > > >     return 0;
> > > > }
> > > > 
> > > > 
> > > > _______________________________________________
> > > > dri-devel mailing list
> > > > dri-devel@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> 
-- 
Cheers,
	Lyude Paul (she/her)
	Software Engineer at Red Hat


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-12 15:44         ` Lyude Paul
  0 siblings, 0 replies; 29+ messages in thread
From: Lyude Paul @ 2020-08-12 15:44 UTC (permalink / raw)
  To: Daniel Vetter, Zwane Mwaikambo; +Cc: dkwon, Linux Kernel, dri-devel, tcamuso

On Wed, 2020-08-12 at 16:10 +0200, Daniel Vetter wrote:
> On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
> > On Tue, 11 Aug 2020, Daniel Vetter wrote:
> > 
> > > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > > Hi Folks,
> > > >     I know this thread eventually dropped off due to not identifying
> > > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > > happened because the udev device nodes for the DP aux devices were not
> > > > cleaned up whereas the kernel had no association with them. I can
> > > > reproduce the bug just by creating a device node for a non-existent
> > > > minor
> > > > device and calling open().
> > > 
> > > Hm I don't have that thread anymore, but generally these bugs are solved
> > > by not registering the device before it's ready for use. We do have
> > > drm_connector->late_register for that stuff. Just a guess since I'm not
> > > seeing full details here.
> > 
> > In this particular case, the physical device disappeared before the nodes
> > were cleaned up. It involves putting a computer to sleep with a monitor
> > plugged in and then waking it up with the monitor unplugged.
> 
> We also have early_unregister for the reverse, but yes this sounds
> more tricky ... Adding Lyude who's been working on way too much
> lifetime fun around dp recently.
> -Daniel
> 
Hi-I think just checking whether the auxdev is NULL or not is a reasonable
fix, although I am curious as to how exactly the aux dev's parent is getting
destroyed before it's child, which I would have thought would be the only way
you could hit this?

> > 
> > > > To me it still makes sense to just check aux_dev because the chardev
> > > > has
> > > > no way to check before calling.
> > > > 
> > > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > > 0x17b39 is in drm_dp_aux_dev_get_by_minor
> > > > (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > > 60      static struct drm_dp_aux_dev
> > > > *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > > 61      {
> > > > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > > > 63
> > > > 64              mutex_lock(&aux_idr_mutex);
> > > > 65              aux_dev = idr_find(&aux_idr, index);
> > > > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > > > 67                      aux_dev = NULL;
> > > > 68              mutex_unlock(&aux_idr_mutex);
> > > > 69
> > > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > > $8 = 0x18
> > > > 
> > > > static int auxdev_open(struct inode *inode, struct file *file)
> > > > {
> > > >     unsigned int minor = iminor(inode);
> > > >     struct drm_dp_aux_dev *aux_dev;
> > > > 
> > > >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > > >     if (!aux_dev)
> > > >         return -ENODEV;
> > > > 
> > > >     file->private_data = aux_dev;
> > > >     return 0;
> > > > }
> > > > 
> > > > 
> > > > _______________________________________________
> > > > dri-devel mailing list
> > > > dri-devel@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> 
-- 
Cheers,
	Lyude Paul (she/her)
	Software Engineer at Red Hat

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-11 22:16     ` Zwane Mwaikambo
@ 2020-08-12 14:10       ` Daniel Vetter
  -1 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2020-08-12 14:10 UTC (permalink / raw)
  To: Zwane Mwaikambo, Lyude; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel

On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
>
> On Tue, 11 Aug 2020, Daniel Vetter wrote:
>
> > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > Hi Folks,
> > >     I know this thread eventually dropped off due to not identifying
> > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > happened because the udev device nodes for the DP aux devices were not
> > > cleaned up whereas the kernel had no association with them. I can
> > > reproduce the bug just by creating a device node for a non-existent minor
> > > device and calling open().
> >
> > Hm I don't have that thread anymore, but generally these bugs are solved
> > by not registering the device before it's ready for use. We do have
> > drm_connector->late_register for that stuff. Just a guess since I'm not
> > seeing full details here.
>
> In this particular case, the physical device disappeared before the nodes
> were cleaned up. It involves putting a computer to sleep with a monitor
> plugged in and then waking it up with the monitor unplugged.

We also have early_unregister for the reverse, but yes this sounds
more tricky ... Adding Lyude who's been working on way too much
lifetime fun around dp recently.
-Daniel

>
>
> > >
> > > To me it still makes sense to just check aux_dev because the chardev has
> > > no way to check before calling.
> > >
> > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > 60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > 61      {
> > > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > > 63
> > > 64              mutex_lock(&aux_idr_mutex);
> > > 65              aux_dev = idr_find(&aux_idr, index);
> > > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > > 67                      aux_dev = NULL;
> > > 68              mutex_unlock(&aux_idr_mutex);
> > > 69
> > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > $8 = 0x18
> > >
> > > static int auxdev_open(struct inode *inode, struct file *file)
> > > {
> > >     unsigned int minor = iminor(inode);
> > >     struct drm_dp_aux_dev *aux_dev;
> > >
> > >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > >     if (!aux_dev)
> > >         return -ENODEV;
> > >
> > >     file->private_data = aux_dev;
> > >     return 0;
> > > }
> > >
> > >
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> >



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-12 14:10       ` Daniel Vetter
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2020-08-12 14:10 UTC (permalink / raw)
  To: Zwane Mwaikambo, Lyude; +Cc: dkwon, Linux Kernel, dri-devel, tcamuso

On Wed, Aug 12, 2020 at 12:16 AM Zwane Mwaikambo <zwanem@gmail.com> wrote:
>
> On Tue, 11 Aug 2020, Daniel Vetter wrote:
>
> > On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > > Hi Folks,
> > >     I know this thread eventually dropped off due to not identifying
> > > the underlying issue. It's still occuring on 5.8 and in my case it
> > > happened because the udev device nodes for the DP aux devices were not
> > > cleaned up whereas the kernel had no association with them. I can
> > > reproduce the bug just by creating a device node for a non-existent minor
> > > device and calling open().
> >
> > Hm I don't have that thread anymore, but generally these bugs are solved
> > by not registering the device before it's ready for use. We do have
> > drm_connector->late_register for that stuff. Just a guess since I'm not
> > seeing full details here.
>
> In this particular case, the physical device disappeared before the nodes
> were cleaned up. It involves putting a computer to sleep with a monitor
> plugged in and then waking it up with the monitor unplugged.

We also have early_unregister for the reverse, but yes this sounds
more tricky ... Adding Lyude who's been working on way too much
lifetime fun around dp recently.
-Daniel

>
>
> > >
> > > To me it still makes sense to just check aux_dev because the chardev has
> > > no way to check before calling.
> > >
> > > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > > 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > > 60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> > > 61      {
> > > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > > 63
> > > 64              mutex_lock(&aux_idr_mutex);
> > > 65              aux_dev = idr_find(&aux_idr, index);
> > > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > > 67                      aux_dev = NULL;
> > > 68              mutex_unlock(&aux_idr_mutex);
> > > 69
> > > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > > $8 = 0x18
> > >
> > > static int auxdev_open(struct inode *inode, struct file *file)
> > > {
> > >     unsigned int minor = iminor(inode);
> > >     struct drm_dp_aux_dev *aux_dev;
> > >
> > >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> > >     if (!aux_dev)
> > >         return -ENODEV;
> > >
> > >     file->private_data = aux_dev;
> > >     return 0;
> > > }
> > >
> > >
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> >



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-11  8:58   ` Daniel Vetter
@ 2020-08-11 22:16     ` Zwane Mwaikambo
  -1 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-11 22:16 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel

On Tue, 11 Aug 2020, Daniel Vetter wrote:

> On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > Hi Folks,
> > 	I know this thread eventually dropped off due to not identifying 
> > the underlying issue. It's still occuring on 5.8 and in my case it 
> > happened because the udev device nodes for the DP aux devices were not 
> > cleaned up whereas the kernel had no association with them. I can 
> > reproduce the bug just by creating a device node for a non-existent minor 
> > device and calling open().
> 
> Hm I don't have that thread anymore, but generally these bugs are solved
> by not registering the device before it's ready for use. We do have
> drm_connector->late_register for that stuff. Just a guess since I'm not
> seeing full details here.

In this particular case, the physical device disappeared before the nodes 
were cleaned up. It involves putting a computer to sleep with a monitor 
plugged in and then waking it up with the monitor unplugged.


> > 
> > To me it still makes sense to just check aux_dev because the chardev has 
> > no way to check before calling.
> > 
> > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > 60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> > 61      {
> > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > 63
> > 64              mutex_lock(&aux_idr_mutex);
> > 65              aux_dev = idr_find(&aux_idr, index);
> > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > 67                      aux_dev = NULL;
> > 68              mutex_unlock(&aux_idr_mutex);
> > 69
> > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > $8 = 0x18
> > 
> > static int auxdev_open(struct inode *inode, struct file *file)
> > {
> >     unsigned int minor = iminor(inode);
> >     struct drm_dp_aux_dev *aux_dev;
> > 
> >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> >     if (!aux_dev)
> >         return -ENODEV;
> > 
> >     file->private_data = aux_dev;
> >     return 0;
> > }
> > 
> > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-11 22:16     ` Zwane Mwaikambo
  0 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-11 22:16 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dkwon, Linux Kernel, dri-devel, tcamuso

On Tue, 11 Aug 2020, Daniel Vetter wrote:

> On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> > Hi Folks,
> > 	I know this thread eventually dropped off due to not identifying 
> > the underlying issue. It's still occuring on 5.8 and in my case it 
> > happened because the udev device nodes for the DP aux devices were not 
> > cleaned up whereas the kernel had no association with them. I can 
> > reproduce the bug just by creating a device node for a non-existent minor 
> > device and calling open().
> 
> Hm I don't have that thread anymore, but generally these bugs are solved
> by not registering the device before it's ready for use. We do have
> drm_connector->late_register for that stuff. Just a guess since I'm not
> seeing full details here.

In this particular case, the physical device disappeared before the nodes 
were cleaned up. It involves putting a computer to sleep with a monitor 
plugged in and then waking it up with the monitor unplugged.


> > 
> > To me it still makes sense to just check aux_dev because the chardev has 
> > no way to check before calling.
> > 
> > (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> > 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> > 60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> > 61      {
> > 62              struct drm_dp_aux_dev *aux_dev = NULL;
> > 63
> > 64              mutex_lock(&aux_idr_mutex);
> > 65              aux_dev = idr_find(&aux_idr, index);
> > 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> > 67                      aux_dev = NULL;
> > 68              mutex_unlock(&aux_idr_mutex);
> > 69
> > (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> > $8 = 0x18
> > 
> > static int auxdev_open(struct inode *inode, struct file *file)
> > {
> >     unsigned int minor = iminor(inode);
> >     struct drm_dp_aux_dev *aux_dev;
> > 
> >     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
> >     if (!aux_dev)
> >         return -ENODEV;
> > 
> >     file->private_data = aux_dev;
> >     return 0;
> > }
> > 
> > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
  2020-08-10 17:11 ` Zwane Mwaikambo
@ 2020-08-11  8:58   ` Daniel Vetter
  -1 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2020-08-11  8:58 UTC (permalink / raw)
  To: Zwane Mwaikambo; +Cc: tcamuso, dkwon, Linux Kernel, dri-devel

On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> Hi Folks,
> 	I know this thread eventually dropped off due to not identifying 
> the underlying issue. It's still occuring on 5.8 and in my case it 
> happened because the udev device nodes for the DP aux devices were not 
> cleaned up whereas the kernel had no association with them. I can 
> reproduce the bug just by creating a device node for a non-existent minor 
> device and calling open().

Hm I don't have that thread anymore, but generally these bugs are solved
by not registering the device before it's ready for use. We do have
drm_connector->late_register for that stuff. Just a guess since I'm not
seeing full details here.
-Daniel

> 
> To me it still makes sense to just check aux_dev because the chardev has 
> no way to check before calling.
> 
> (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> 60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> 61      {
> 62              struct drm_dp_aux_dev *aux_dev = NULL;
> 63
> 64              mutex_lock(&aux_idr_mutex);
> 65              aux_dev = idr_find(&aux_idr, index);
> 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> 67                      aux_dev = NULL;
> 68              mutex_unlock(&aux_idr_mutex);
> 69
> (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> $8 = 0x18
> 
> static int auxdev_open(struct inode *inode, struct file *file)
> {
>     unsigned int minor = iminor(inode);
>     struct drm_dp_aux_dev *aux_dev;
> 
>     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
>     if (!aux_dev)
>         return -ENODEV;
> 
>     file->private_data = aux_dev;
>     return 0;
> }
> 
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-11  8:58   ` Daniel Vetter
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel Vetter @ 2020-08-11  8:58 UTC (permalink / raw)
  To: Zwane Mwaikambo; +Cc: dkwon, Linux Kernel, dri-devel, tcamuso

On Mon, Aug 10, 2020 at 10:11:50AM -0700, Zwane Mwaikambo wrote:
> Hi Folks,
> 	I know this thread eventually dropped off due to not identifying 
> the underlying issue. It's still occuring on 5.8 and in my case it 
> happened because the udev device nodes for the DP aux devices were not 
> cleaned up whereas the kernel had no association with them. I can 
> reproduce the bug just by creating a device node for a non-existent minor 
> device and calling open().

Hm I don't have that thread anymore, but generally these bugs are solved
by not registering the device before it's ready for use. We do have
drm_connector->late_register for that stuff. Just a guess since I'm not
seeing full details here.
-Daniel

> 
> To me it still makes sense to just check aux_dev because the chardev has 
> no way to check before calling.
> 
> (gdb) list *drm_dp_aux_dev_get_by_minor+0x29
> 0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
> 60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
> 61      {
> 62              struct drm_dp_aux_dev *aux_dev = NULL;
> 63
> 64              mutex_lock(&aux_idr_mutex);
> 65              aux_dev = idr_find(&aux_idr, index);
> 66              if (!kref_get_unless_zero(&aux_dev->refcount))
> 67                      aux_dev = NULL;
> 68              mutex_unlock(&aux_idr_mutex);
> 69
> (gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
> $8 = 0x18
> 
> static int auxdev_open(struct inode *inode, struct file *file)
> {
>     unsigned int minor = iminor(inode);
>     struct drm_dp_aux_dev *aux_dev;
> 
>     aux_dev = drm_dp_aux_dev_get_by_minor(minor);
>     if (!aux_dev)
>         return -ENODEV;
> 
>     file->private_data = aux_dev;
>     return 0;
> }
> 
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-10 17:11 ` Zwane Mwaikambo
  0 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-10 17:11 UTC (permalink / raw)
  To: tcamuso; +Cc: Linux Kernel, dri-devel, dkwon

Hi Folks,
	I know this thread eventually dropped off due to not identifying 
the underlying issue. It's still occuring on 5.8 and in my case it 
happened because the udev device nodes for the DP aux devices were not 
cleaned up whereas the kernel had no association with them. I can 
reproduce the bug just by creating a device node for a non-existent minor 
device and calling open().

To me it still makes sense to just check aux_dev because the chardev has 
no way to check before calling.

(gdb) list *drm_dp_aux_dev_get_by_minor+0x29
0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
61      {
62              struct drm_dp_aux_dev *aux_dev = NULL;
63
64              mutex_lock(&aux_idr_mutex);
65              aux_dev = idr_find(&aux_idr, index);
66              if (!kref_get_unless_zero(&aux_dev->refcount))
67                      aux_dev = NULL;
68              mutex_unlock(&aux_idr_mutex);
69
(gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
$8 = 0x18

static int auxdev_open(struct inode *inode, struct file *file)
{
    unsigned int minor = iminor(inode);
    struct drm_dp_aux_dev *aux_dev;

    aux_dev = drm_dp_aux_dev_get_by_minor(minor);
    if (!aux_dev)
        return -ENODEV;

    file->private_data = aux_dev;
    return 0;
}



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] drm: assure aux_dev is nonzero before using it
@ 2020-08-10 17:11 ` Zwane Mwaikambo
  0 siblings, 0 replies; 29+ messages in thread
From: Zwane Mwaikambo @ 2020-08-10 17:11 UTC (permalink / raw)
  To: tcamuso; +Cc: dkwon, Linux Kernel, dri-devel

Hi Folks,
	I know this thread eventually dropped off due to not identifying 
the underlying issue. It's still occuring on 5.8 and in my case it 
happened because the udev device nodes for the DP aux devices were not 
cleaned up whereas the kernel had no association with them. I can 
reproduce the bug just by creating a device node for a non-existent minor 
device and calling open().

To me it still makes sense to just check aux_dev because the chardev has 
no way to check before calling.

(gdb) list *drm_dp_aux_dev_get_by_minor+0x29
0x17b39 is in drm_dp_aux_dev_get_by_minor (drivers/gpu/drm/drm_dp_aux_dev.c:65).
60      static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
61      {
62              struct drm_dp_aux_dev *aux_dev = NULL;
63
64              mutex_lock(&aux_idr_mutex);
65              aux_dev = idr_find(&aux_idr, index);
66              if (!kref_get_unless_zero(&aux_dev->refcount))
67                      aux_dev = NULL;
68              mutex_unlock(&aux_idr_mutex);
69
(gdb) p/x &((struct drm_dp_aux_dev *)(0x0))->refcount
$8 = 0x18

static int auxdev_open(struct inode *inode, struct file *file)
{
    unsigned int minor = iminor(inode);
    struct drm_dp_aux_dev *aux_dev;

    aux_dev = drm_dp_aux_dev_get_by_minor(minor);
    if (!aux_dev)
        return -ENODEV;

    file->private_data = aux_dev;
    return 0;
}


_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2020-09-08 18:45 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-23 11:09 [PATCH] drm: assure aux_dev is nonzero before using it tcamuso
2019-05-23 11:09 ` tcamuso
2019-05-24  8:36 ` Jani Nikula
2019-05-24  8:36   ` Jani Nikula
2019-05-24 10:48   ` tony camuso
2019-05-24 11:58     ` Ville Syrjälä
2019-07-10 13:47   ` Tony Camuso
2019-07-10 13:56     ` Ville Syrjälä
2019-07-12 16:07       ` Tony Camuso
2019-07-12 17:06         ` Ville Syrjälä
2019-07-12 17:35           ` Tony Camuso
2019-09-23 15:03           ` Tony Camuso
2019-09-23 15:22             ` Ville Syrjälä
2020-08-10 17:11 Zwane Mwaikambo
2020-08-10 17:11 ` Zwane Mwaikambo
2020-08-11  8:58 ` Daniel Vetter
2020-08-11  8:58   ` Daniel Vetter
2020-08-11 22:16   ` Zwane Mwaikambo
2020-08-11 22:16     ` Zwane Mwaikambo
2020-08-12 14:10     ` Daniel Vetter
2020-08-12 14:10       ` Daniel Vetter
2020-08-12 15:44       ` Lyude Paul
2020-08-12 15:44         ` Lyude Paul
2020-08-12 20:21         ` Zwane Mwaikambo
2020-08-12 20:21           ` Zwane Mwaikambo
2020-08-18 17:58         ` Zwane Mwaikambo
2020-08-18 17:58           ` Zwane Mwaikambo
2020-09-08 18:41           ` Lyude Paul
2020-09-08 18:41             ` Lyude Paul

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.