linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] devcoredump : Serialize devcd_del work
@ 2022-04-19 10:27 Mukesh Ojha
  2022-04-22 10:03 ` Mukesh Ojha
  2022-04-22 13:41 ` Johannes Berg
  0 siblings, 2 replies; 9+ messages in thread
From: Mukesh Ojha @ 2022-04-19 10:27 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, sboyd, rafael, johannes, Mukesh Ojha

In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd
device to the framework which sends uevent notification to userspace
and another thread Y reads this uevent and call to devcd_data_write()
which eventually try to delete the queued timer that is not initialized/queued yet.

So, debug object reports some warning and in the meantime, timer is initialized
and queued from X path. and from Y path, it gets reinitialized again and
timer->entry.pprev=NULL and try_to_grab_pending() stucks.

To fix this, introduce mutex to serialize the behaviour.

 	cpu0(X)			                      cpu1(Y)

    dev_coredump() uevent sent to userspace
    device_add()  =========================> userspace process Y reads the uevents
                                             writes to devcd fd which
                                             results into writes to

                                            devcd_data_write()
                                              mod_delayed_work()
                                                try_to_grab_pending()
                                                  del_timer()
                                                    debug_assert_init()
   INIT_DELAYED_WORK
   schedule_delayed_work
                                                     debug_object_fixup()
                                                      timer_fixup_assert_init()
                                                       timer_setup()
                                                         do_init_timer()   ==> reinitialized the
                                                                                 timer to
                                                                                 timer->entry.pprev=NULL

                                                  timer_pending()
                                                   !hlist_unhashed_lockless(&timer->entry)
                                                     !h->pprev  ==> del_timer checks
                                                                  and finds it to be NULL
 								  try_to_grab_pending() stucks.

Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
 drivers/base/devcoredump.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index f4d794d..316f566 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -25,6 +25,7 @@ struct devcd_entry {
 	struct device devcd_dev;
 	void *data;
 	size_t datalen;
+	struct mutex mutex;
 	struct module *owner;
 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen);
@@ -84,7 +85,9 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
+	mutex_lock(&devcd->mutex);
 	mod_delayed_work(system_wq, &devcd->del_wk, 0);
+	mutex_unlock(&devcd->mutex);
 
 	return count;
 }
@@ -112,7 +115,9 @@ static int devcd_free(struct device *dev, void *data)
 {
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
+	mutex_lock(&devcd->mutex);
 	flush_delayed_work(&devcd->del_wk);
+	mutex_unlock(&devcd->mutex);
 	return 0;
 }
 
@@ -278,13 +283,14 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 	devcd->read = read;
 	devcd->free = free;
 	devcd->failing_dev = get_device(dev);
-
+	mutex_init(&devcd->mutex);
 	device_initialize(&devcd->devcd_dev);
 
 	dev_set_name(&devcd->devcd_dev, "devcd%d",
 		     atomic_inc_return(&devcd_count));
 	devcd->devcd_dev.class = &devcd_class;
 
+	mutex_lock(&devcd->mutex);
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
 
@@ -301,10 +307,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 
 	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
-
+	mutex_unlock(&devcd->mutex);
 	return;
  put_device:
 	put_device(&devcd->devcd_dev);
+	mutex_unlock(&devcd->mutex);
  put_module:
 	module_put(owner);
  free:
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-19 10:27 [PATCH] devcoredump : Serialize devcd_del work Mukesh Ojha
@ 2022-04-22 10:03 ` Mukesh Ojha
  2022-04-22 12:13   ` Greg KH
  2022-04-22 13:41 ` Johannes Berg
  1 sibling, 1 reply; 9+ messages in thread
From: Mukesh Ojha @ 2022-04-22 10:03 UTC (permalink / raw)
  To: linux-kernel, Greg KH; +Cc: tglx, sboyd, rafael, johannes


Hi All,

Request you all the review comments on the fix of the described problem?

-Mukesh


On 4/19/2022 3:57 PM, Mukesh Ojha wrote:
> In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd
> device to the framework which sends uevent notification to userspace
> and another thread Y reads this uevent and call to devcd_data_write()
> which eventually try to delete the queued timer that is not initialized/queued yet.
> 
> So, debug object reports some warning and in the meantime, timer is initialized
> and queued from X path. and from Y path, it gets reinitialized again and
> timer->entry.pprev=NULL and try_to_grab_pending() stucks.
> 
> To fix this, introduce mutex to serialize the behaviour.
> 
>   	cpu0(X)			                      cpu1(Y)
> 
>      dev_coredump() uevent sent to userspace
>      device_add()  =========================> userspace process Y reads the uevents
>                                               writes to devcd fd which
>                                               results into writes to
> 
>                                              devcd_data_write()
>                                                mod_delayed_work()
>                                                  try_to_grab_pending()
>                                                    del_timer()
>                                                      debug_assert_init()
>     INIT_DELAYED_WORK
>     schedule_delayed_work
>                                                       debug_object_fixup()
>                                                        timer_fixup_assert_init()
>                                                         timer_setup()
>                                                           do_init_timer()   ==> reinitialized the
>                                                                                   timer to
>                                                                                   timer->entry.pprev=NULL
> 
>                                                    timer_pending()
>                                                     !hlist_unhashed_lockless(&timer->entry)
>                                                       !h->pprev  ==> del_timer checks
>                                                                    and finds it to be NULL
>   								  try_to_grab_pending() stucks.
> 
> Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
>   drivers/base/devcoredump.c | 11 +++++++++--
>   1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
> index f4d794d..316f566 100644
> --- a/drivers/base/devcoredump.c
> +++ b/drivers/base/devcoredump.c
> @@ -25,6 +25,7 @@ struct devcd_entry {
>   	struct device devcd_dev;
>   	void *data;
>   	size_t datalen;
> +	struct mutex mutex;
>   	struct module *owner;
>   	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
>   			void *data, size_t datalen);
> @@ -84,7 +85,9 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
>   	struct device *dev = kobj_to_dev(kobj);
>   	struct devcd_entry *devcd = dev_to_devcd(dev);
>   
> +	mutex_lock(&devcd->mutex);
>   	mod_delayed_work(system_wq, &devcd->del_wk, 0);
> +	mutex_unlock(&devcd->mutex);
>   
>   	return count;
>   }
> @@ -112,7 +115,9 @@ static int devcd_free(struct device *dev, void *data)
>   {
>   	struct devcd_entry *devcd = dev_to_devcd(dev);
>   
> +	mutex_lock(&devcd->mutex);
>   	flush_delayed_work(&devcd->del_wk);
> +	mutex_unlock(&devcd->mutex);
>   	return 0;
>   }
>   
> @@ -278,13 +283,14 @@ void dev_coredumpm(struct device *dev, struct module *owner,
>   	devcd->read = read;
>   	devcd->free = free;
>   	devcd->failing_dev = get_device(dev);
> -
> +	mutex_init(&devcd->mutex);
>   	device_initialize(&devcd->devcd_dev);
>   
>   	dev_set_name(&devcd->devcd_dev, "devcd%d",
>   		     atomic_inc_return(&devcd_count));
>   	devcd->devcd_dev.class = &devcd_class;
>   
> +	mutex_lock(&devcd->mutex);
>   	if (device_add(&devcd->devcd_dev))
>   		goto put_device;
>   
> @@ -301,10 +307,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
>   
>   	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
>   	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
> -
> +	mutex_unlock(&devcd->mutex);
>   	return;
>    put_device:
>   	put_device(&devcd->devcd_dev);
> +	mutex_unlock(&devcd->mutex);
>    put_module:
>   	module_put(owner);
>    free:

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-22 10:03 ` Mukesh Ojha
@ 2022-04-22 12:13   ` Greg KH
  2022-04-22 13:55     ` Greg KH
  0 siblings, 1 reply; 9+ messages in thread
From: Greg KH @ 2022-04-22 12:13 UTC (permalink / raw)
  To: Mukesh Ojha; +Cc: linux-kernel, tglx, sboyd, rafael, johannes


A: http://en.wikipedia.org/wiki/Top_post
Q: Were do I find info about this thing called top-posting?
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing in e-mail?

A: No.
Q: Should I include quotations after my reply?

http://daringfireball.net/2007/07/on_top

On Fri, Apr 22, 2022 at 03:33:08PM +0530, Mukesh Ojha wrote:
> 
> Hi All,
> 
> Request you all the review comments on the fix of the described problem?
> 
> -Mukesh
> 
> 
> On 4/19/2022 3:57 PM, Mukesh Ojha wrote:

You sent this 3 days ago, please be realistic.

If you are concerned about the delay in patch reviews, please help us
out and review patches sent to the list.  Otherwise, don't start to
worry until at least after 2 weeks.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-19 10:27 [PATCH] devcoredump : Serialize devcd_del work Mukesh Ojha
  2022-04-22 10:03 ` Mukesh Ojha
@ 2022-04-22 13:41 ` Johannes Berg
  2022-04-22 13:53   ` Johannes Berg
  1 sibling, 1 reply; 9+ messages in thread
From: Johannes Berg @ 2022-04-22 13:41 UTC (permalink / raw)
  To: Mukesh Ojha, linux-kernel; +Cc: tglx, sboyd, rafael

On Tue, 2022-04-19 at 15:57 +0530, Mukesh Ojha wrote:
> In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd
> device to the framework which sends uevent notification to userspace
> and another thread Y reads this uevent and call to devcd_data_write()
> which eventually try to delete the queued timer that is not initialized/queued yet.
> 
> So, debug object reports some warning and in the meantime, timer is initialized
> and queued from X path. and from Y path, it gets reinitialized again and
> timer->entry.pprev=NULL and try_to_grab_pending() stucks.
> 
> To fix this, introduce mutex to serialize the behaviour.
> 
>  	cpu0(X)			                      cpu1(Y)
> 
>     dev_coredump() uevent sent to userspace
>     device_add()  =========================> userspace process Y reads the uevents
>                                              writes to devcd fd which
>                                              results into writes to
> 
>                                             devcd_data_write()
>                                               mod_delayed_work()
>                                                 try_to_grab_pending()
>                                                   del_timer()
>                                                     debug_assert_init()
>    INIT_DELAYED_WORK
>    schedule_delayed_work
> 

Wouldn't it be easier to simply schedule this before adding it to sysfs
and sending the uevent?

johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-22 13:41 ` Johannes Berg
@ 2022-04-22 13:53   ` Johannes Berg
  2022-04-22 15:21     ` Thomas Gleixner
  2022-04-25 13:17     ` Mukesh Ojha
  0 siblings, 2 replies; 9+ messages in thread
From: Johannes Berg @ 2022-04-22 13:53 UTC (permalink / raw)
  To: Mukesh Ojha, linux-kernel; +Cc: tglx, sboyd, rafael

On Fri, 2022-04-22 at 15:41 +0200, Johannes Berg wrote:
> On Tue, 2022-04-19 at 15:57 +0530, Mukesh Ojha wrote:
> > In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd
> > device to the framework which sends uevent notification to userspace
> > and another thread Y reads this uevent and call to devcd_data_write()
> > which eventually try to delete the queued timer that is not initialized/queued yet.
> > 
> > So, debug object reports some warning and in the meantime, timer is initialized
> > and queued from X path. and from Y path, it gets reinitialized again and
> > timer->entry.pprev=NULL and try_to_grab_pending() stucks.
> > 
> > To fix this, introduce mutex to serialize the behaviour.
> > 
> >  	cpu0(X)			                      cpu1(Y)
> > 
> >     dev_coredump() uevent sent to userspace
> >     device_add()  =========================> userspace process Y reads the uevents
> >                                              writes to devcd fd which
> >                                              results into writes to
> > 
> >                                             devcd_data_write()
> >                                               mod_delayed_work()
> >                                                 try_to_grab_pending()
> >                                                   del_timer()
> >                                                     debug_assert_init()
> >    INIT_DELAYED_WORK
> >    schedule_delayed_work
> > 
> 
> Wouldn't it be easier to simply schedule this before adding it to sysfs
> and sending the uevent?
> 

Hm. I think that would solve this problem, but not all of the problems
here ...

Even with your change, I believe it's still racy wrt. disabled_store(),
since that flushes the work but devcd_data_write() remains reachable
(and might in fact be waiting for the mutex after your change), so I
think we need an additional flag somewhere (in addition to the mutex) to
serialize all of these things against each other.

johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-22 12:13   ` Greg KH
@ 2022-04-22 13:55     ` Greg KH
  0 siblings, 0 replies; 9+ messages in thread
From: Greg KH @ 2022-04-22 13:55 UTC (permalink / raw)
  To: Mukesh Ojha; +Cc: linux-kernel, tglx, sboyd, rafael, johannes

On Fri, Apr 22, 2022 at 02:13:47PM +0200, Greg KH wrote:
> 
> A: http://en.wikipedia.org/wiki/Top_post
> Q: Were do I find info about this thing called top-posting?
> A: Because it messes up the order in which people normally read text.
> Q: Why is top-posting such a bad thing?
> A: Top-posting.
> Q: What is the most annoying thing in e-mail?
> 
> A: No.
> Q: Should I include quotations after my reply?
> 
> http://daringfireball.net/2007/07/on_top
> 
> On Fri, Apr 22, 2022 at 03:33:08PM +0530, Mukesh Ojha wrote:
> > 
> > Hi All,
> > 
> > Request you all the review comments on the fix of the described problem?
> > 
> > -Mukesh
> > 
> > 
> > On 4/19/2022 3:57 PM, Mukesh Ojha wrote:
> 
> You sent this 3 days ago, please be realistic.
> 
> If you are concerned about the delay in patch reviews, please help us
> out and review patches sent to the list.  Otherwise, don't start to
> worry until at least after 2 weeks.

You also didn't even cc: me, so how could I see this at all?

So, nothing to review in my inbox, nice!

{sigh}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-22 13:53   ` Johannes Berg
@ 2022-04-22 15:21     ` Thomas Gleixner
  2022-04-25 13:17     ` Mukesh Ojha
  1 sibling, 0 replies; 9+ messages in thread
From: Thomas Gleixner @ 2022-04-22 15:21 UTC (permalink / raw)
  To: Johannes Berg, Mukesh Ojha, linux-kernel; +Cc: sboyd, rafael

On Fri, Apr 22 2022 at 15:53, Johannes Berg wrote:
> On Fri, 2022-04-22 at 15:41 +0200, Johannes Berg wrote:
>> On Tue, 2022-04-19 at 15:57 +0530, Mukesh Ojha wrote:
>> >    INIT_DELAYED_WORK
>> >    schedule_delayed_work
>> > 
>> 
>> Wouldn't it be easier to simply schedule this before adding it to sysfs
>> and sending the uevent?

Only if it's guaranteed that the timer will not expire before
add_device() succeeds. It's likely, but there is virt....

> Hm. I think that would solve this problem, but not all of the problems
> here ...
>
> Even with your change, I believe it's still racy wrt. disabled_store(),
> since that flushes the work but devcd_data_write() remains reachable
> (and might in fact be waiting for the mutex after your change), so I
> think we need an additional flag somewhere (in addition to the mutex) to
> serialize all of these things against each other.

Plus there is disabled_store() which iterates over the devices and
reschedules the work. How is that supposed to be protected against a
concurrent devcd_del()?

Why needs disabled_store() to do that at all?

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] devcoredump : Serialize devcd_del work
  2022-04-22 13:53   ` Johannes Berg
  2022-04-22 15:21     ` Thomas Gleixner
@ 2022-04-25 13:17     ` Mukesh Ojha
  1 sibling, 0 replies; 9+ messages in thread
From: Mukesh Ojha @ 2022-04-25 13:17 UTC (permalink / raw)
  To: Johannes Berg, linux-kernel; +Cc: tglx, sboyd, rafael, johannes, gregkh

On Fri, Apr 22, 2022 at 03:53:35PM +0200, Johannes Berg wrote:
> On Fri, 2022-04-22 at 15:41 +0200, Johannes Berg wrote:
> > On Tue, 2022-04-19 at 15:57 +0530, Mukesh Ojha wrote:
> > > In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd
> > > device to the framework which sends uevent notification to userspace
> > > and another thread Y reads this uevent and call to devcd_data_write()
> > > which eventually try to delete the queued timer that is not initialized/queued yet.
> > > 
> > > So, debug object reports some warning and in the meantime, timer is initialized
> > > and queued from X path. and from Y path, it gets reinitialized again and
> > > timer->entry.pprev=NULL and try_to_grab_pending() stucks.
> > > 
> > > To fix this, introduce mutex to serialize the behaviour.
> > > 
> > >  	cpu0(X)			                      cpu1(Y)
> > > 
> > >     dev_coredump() uevent sent to userspace
> > >     device_add()  =========================> userspace process Y reads the uevents
> > >                                              writes to devcd fd which
> > >                                              results into writes to
> > > 
> > >                                             devcd_data_write()
> > >                                               mod_delayed_work()
> > >                                                 try_to_grab_pending()
> > >                                                   del_timer()
> > >                                                     debug_assert_init()
> > >    INIT_DELAYED_WORK
> > >    schedule_delayed_work
> > > 
> > 
> > Wouldn't it be easier to simply schedule this before adding it to sysfs
> > and sending the uevent?
> > 
> 
> Hm. I think that would solve this problem, but not all of the problems
> here ...
> 
> Even with your change, I believe it's still racy wrt. disabled_store(),
> since that flushes the work but devcd_data_write() remains reachable
> (and might in fact be waiting for the mutex after your change), so I
> think we need an additional flag somewhere (in addition to the mutex) to
> serialize all of these things against each other.

Can we do something like this in v2

https://lore.kernel.org/lkml/1650892193-12888-1-git-send-email-quic_mojha@quicinc.com/

Thanks,
-Mukesh

> 
> johannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] devcoredump : Serialize devcd_del work
@ 2022-05-02 15:34 Mukesh Ojha
  0 siblings, 0 replies; 9+ messages in thread
From: Mukesh Ojha @ 2022-05-02 15:34 UTC (permalink / raw)
  To: linux-kernel; +Cc: gregkh, tglx, sboyd, rafael, johannes, keescook, Mukesh Ojha

In following scenario(diagram), when one thread X running dev_coredumpm()
adds devcd device to the framework which sends uevent notification to
userspace and another thread Y reads this uevent and call to
devcd_data_write() which eventually try to delete the queued timer that
is not initialized/queued yet.

So, debug object reports some warning and in the meantime, timer is
initialized and queued from X path. and from Y path, it gets reinitialized
again and timer->entry.pprev=NULL and try_to_grab_pending() stucks.

To fix this, introduce mutex and a boolean flag to serialize the behaviour.

 	cpu0(X)			                cpu1(Y)

    dev_coredump() uevent sent to user space
    device_add()  ======================> user space process Y reads the
                                          uevents writes to devcd fd
                                          which results into writes to

                                         devcd_data_write()
                                           mod_delayed_work()
                                             try_to_grab_pending()
                                               del_timer()
                                                 debug_assert_init()
   INIT_DELAYED_WORK()
   schedule_delayed_work()
                                                   debug_object_fixup()
                                                     timer_fixup_assert_init()
                                                       timer_setup()
                                                         do_init_timer()
                                                       /*
                                                        Above call reinitializes
                                                        the timer to
                                                        timer->entry.pprev=NULL
                                                        and this will be checked
                                                        later in timer_pending() call.
                                                       */
                                                 timer_pending()
                                                  !hlist_unhashed_lockless(&timer->entry)
                                                    !h->pprev
                                                /*
                                                  del_timer() checks h->pprev and finds
                                                  it to be NULL due to which
                                                  try_to_grab_pending() stucks.
                                                */

Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
v3->v4:
 - flg variable renamed to delete_work.

v2->v3:
 Addressed comments from gregkh
 - Wrapped the commit text and corrected the alignment.
 - Described the reason to introduce new variables.
 - Restored the blank line.
 - rename the del_wk_queued to flg.
 Addressed comments from tglx
 - Added a comment which explains the race which looks obvious however
   would not occur between disabled_store and devcd_del work.


v1->v2:
 - Added del_wk_queued flag to serialize the race between devcd_data_write()
   and disabled_store() => devcd_free().
 drivers/base/devcoredump.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index f4d794d..1c06781 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -25,6 +25,47 @@ struct devcd_entry {
 	struct device devcd_dev;
 	void *data;
 	size_t datalen;
+	/*
+	 * Here, mutex is required to serialize the calls to del_wk work between
+	 * user/kernel space which happens when devcd is added with device_add()
+	 * and that sends uevent to user space. User space reads the uevents,
+	 * and calls to devcd_data_write() which try to modify the work which is
+	 * not even initialized/queued from devcoredump.
+	 *
+	 *
+	 *
+	 *        cpu0(X)                                 cpu1(Y)
+	 *
+	 *        dev_coredump() uevent sent to user space
+	 *        device_add()  ======================> user space process Y reads the
+	 *                                              uevents writes to devcd fd
+	 *                                              which results into writes to
+	 *
+	 *                                             devcd_data_write()
+	 *                                               mod_delayed_work()
+	 *                                                 try_to_grab_pending()
+	 *                                                   del_timer()
+	 *                                                     debug_assert_init()
+	 *       INIT_DELAYED_WORK()
+	 *       schedule_delayed_work()
+	 *
+	 *
+	 * Also, mutex alone would not be enough to avoid scheduling of
+	 * del_wk work after it get flush from a call to devcd_free()
+	 * mentioned as below.
+	 *
+	 *	disabled_store()
+	 *        devcd_free()
+	 *          mutex_lock()             devcd_data_write()
+	 *          flush_delayed_work()
+	 *          mutex_unlock()
+	 *                                   mutex_lock()
+	 *                                   mod_delayed_work()
+	 *                                   mutex_unlock()
+	 * So, delete_work flag is required.
+	 */
+	struct mutex mutex;
+	bool delete_work;
 	struct module *owner;
 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen);
@@ -84,7 +125,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	mod_delayed_work(system_wq, &devcd->del_wk, 0);
+	mutex_lock(&devcd->mutex);
+	if (!devcd->delete_work) {
+		devcd->delete_work = true;
+		mod_delayed_work(system_wq, &devcd->del_wk, 0);
+	}
+	mutex_unlock(&devcd->mutex);
 
 	return count;
 }
@@ -112,7 +158,12 @@ static int devcd_free(struct device *dev, void *data)
 {
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
+	mutex_lock(&devcd->mutex);
+	if (!devcd->delete_work)
+		devcd->delete_work = true;
+
 	flush_delayed_work(&devcd->del_wk);
+	mutex_unlock(&devcd->mutex);
 	return 0;
 }
 
@@ -122,6 +173,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
 	return sysfs_emit(buf, "%d\n", devcd_disabled);
 }
 
+/*
+ *
+ *	disabled_store()                                	worker()
+ *	 class_for_each_device(&devcd_class,
+ *		NULL, NULL, devcd_free)
+ *         ...
+ *         ...
+ *	   while ((dev = class_dev_iter_next(&iter))
+ *                                                             devcd_del()
+ *                                                               device_del()
+ *                                                                 put_device() <- last reference
+ *             error = fn(dev, data)                           devcd_dev_release()
+ *             devcd_free(dev, data)                           kfree(devcd)
+ *             mutex_lock(&devcd->mutex);
+ *
+ *
+ * In the above diagram, It looks like disabled_store() would be racing with parallely
+ * running devcd_del() and result in memory abort while acquiring devcd->mutex which
+ * is called after kfree of devcd memory  after dropping its last reference with
+ * put_device(). However, this will not happens as fn(dev, data) runs
+ * with its own reference to device via klist_node so it is not its last reference.
+ * so, above situation would not occur.
+ */
+
 static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -278,13 +353,16 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 	devcd->read = read;
 	devcd->free = free;
 	devcd->failing_dev = get_device(dev);
+	devcd->delete_work = false;
 
+	mutex_init(&devcd->mutex);
 	device_initialize(&devcd->devcd_dev);
 
 	dev_set_name(&devcd->devcd_dev, "devcd%d",
 		     atomic_inc_return(&devcd_count));
 	devcd->devcd_dev.class = &devcd_class;
 
+	mutex_lock(&devcd->mutex);
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
 
@@ -301,10 +379,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 
 	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
-
+	mutex_unlock(&devcd->mutex);
 	return;
  put_device:
 	put_device(&devcd->devcd_dev);
+	mutex_unlock(&devcd->mutex);
  put_module:
 	module_put(owner);
  free:
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-05-02 15:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-19 10:27 [PATCH] devcoredump : Serialize devcd_del work Mukesh Ojha
2022-04-22 10:03 ` Mukesh Ojha
2022-04-22 12:13   ` Greg KH
2022-04-22 13:55     ` Greg KH
2022-04-22 13:41 ` Johannes Berg
2022-04-22 13:53   ` Johannes Berg
2022-04-22 15:21     ` Thomas Gleixner
2022-04-25 13:17     ` Mukesh Ojha
2022-05-02 15:34 Mukesh Ojha

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).