[PATCH v2 ] devcoredump : Serialize devcd_del work

* [PATCH v2 ] devcoredump : Serialize devcd_del work
@ 2022-04-25 13:09 Mukesh Ojha
  2022-04-25 14:06 ` Greg KH
  2022-04-25 17:00 ` Thomas Gleixner
  0 siblings, 2 replies; 8+ messages in thread
From: Mukesh Ojha @ 2022-04-25 13:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, sboyd, rafael, johannes, gregkh, Mukesh Ojha

In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd
device to the framework which sends uevent notification to userspace
and another thread Y reads this uevent and call to devcd_data_write()
which eventually try to delete the queued timer that is not initialized/queued yet.

So, debug object reports some warning and in the meantime, timer is initialized
and queued from X path. and from Y path, it gets reinitialized again and
timer->entry.pprev=NULL and try_to_grab_pending() stucks.

To fix this, introduce mutex to serialize the behaviour.

 	cpu0(X)			                      cpu1(Y)

    dev_coredump() uevent sent to userspace
    device_add()  =========================> userspace process Y reads the uevents
                                             writes to devcd fd which
                                             results into writes to

                                            devcd_data_write()
                                              mod_delayed_work()
                                                try_to_grab_pending()
                                                  del_timer()
                                                    debug_assert_init()
   INIT_DELAYED_WORK
   schedule_delayed_work
                                                     debug_object_fixup()
                                                      timer_fixup_assert_init()
                                                       timer_setup()
                                                         do_init_timer()   ==> reinitialized the
                                                                                 timer to
                                                                                 timer->entry.pprev=NULL

                                                  timer_pending()
                                                   !hlist_unhashed_lockless(&timer->entry)
                                                     !h->pprev  ==> del_timer checks
                                                                  and finds it to be NULL
 								  try_to_grab_pending() stucks.

Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/
Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
v1->v2:
 - Added del_wk_queued to serialize the race between devcd_data_write()
   and disabled_store().

 drivers/base/devcoredump.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index f4d794d..3e6fd6b 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -25,6 +25,8 @@ struct devcd_entry {
 	struct device devcd_dev;
 	void *data;
 	size_t datalen;
+	struct mutex mutex;
+	bool del_wk_queued;
 	struct module *owner;
 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen);
@@ -84,7 +86,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	mod_delayed_work(system_wq, &devcd->del_wk, 0);
+	mutex_lock(&devcd->mutex);
+	if (!devcd->del_wk_queued) {
+		devcd->del_wk_queued = true;
+		mod_delayed_work(system_wq, &devcd->del_wk, 0);
+	}
+	mutex_unlock(&devcd->mutex);
 
 	return count;
 }
@@ -112,7 +119,12 @@ static int devcd_free(struct device *dev, void *data)
 {
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
+	mutex_lock(&devcd->mutex);
+	if (!devcd->del_wk_queued)
+		devcd->del_wk_queued = true;
+
 	flush_delayed_work(&devcd->del_wk);
+	mutex_unlock(&devcd->mutex);
 	return 0;
 }
 
@@ -278,13 +290,15 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 	devcd->read = read;
 	devcd->free = free;
 	devcd->failing_dev = get_device(dev);
-
+	mutex_init(&devcd->mutex);
 	device_initialize(&devcd->devcd_dev);
 
 	dev_set_name(&devcd->devcd_dev, "devcd%d",
 		     atomic_inc_return(&devcd_count));
 	devcd->devcd_dev.class = &devcd_class;
 
+	mutex_lock(&devcd->mutex);
+	devcd->del_wk_queued = false;
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
 
@@ -301,10 +315,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 
 	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
-
+	mutex_unlock(&devcd->mutex);
 	return;
  put_device:
 	put_device(&devcd->devcd_dev);
+	mutex_unlock(&devcd->mutex);
  put_module:
 	module_put(owner);
  free:
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread