All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Revert "scsi: scsi_debug: Address races following module load"
@ 2022-04-09  4:37 Bart Van Assche
  2022-04-09 19:10 ` Douglas Gilbert
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Bart Van Assche @ 2022-04-09  4:37 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: linux-scsi, Bart Van Assche, Yi Zhang, Douglas Gilbert,
	Bob Pearson, James E.J. Bottomley

Revert the patch mentioned in the subject since it blocks I/O after
module unload has started while this is a legitimate use case. For e.g.
blktests test case srp/001 that patch causes a command timeout to be
triggered for the following call stack:

__schedule+0x4c3/0xd20
schedule+0x82/0x110
schedule_timeout+0x122/0x200
io_schedule_timeout+0x7b/0xc0
__wait_for_common+0x2bc/0x380
wait_for_completion_io_timeout+0x1d/0x20
blk_execute_rq+0x1db/0x200
__scsi_execute+0x1fb/0x310
sd_sync_cache+0x155/0x2c0 [sd_mod]
sd_shutdown+0xbb/0x190 [sd_mod]
sd_remove+0x5b/0x80 [sd_mod]
device_remove+0x9a/0xb0
device_release_driver_internal+0x2c5/0x360
device_release_driver+0x12/0x20
bus_remove_device+0x1aa/0x270
device_del+0x2d4/0x640
__scsi_remove_device+0x168/0x1a0
scsi_forget_host+0xa8/0xb0
scsi_remove_host+0x9b/0x150
sdebug_driver_remove+0x3d/0x140 [scsi_debug]
device_remove+0x6f/0xb0
device_release_driver_internal+0x2c5/0x360
device_release_driver+0x12/0x20
bus_remove_device+0x1aa/0x270
device_del+0x2d4/0x640
device_unregister+0x18/0x70
sdebug_do_remove_host+0x138/0x180 [scsi_debug]
scsi_debug_exit+0x45/0xd5 [scsi_debug]
__do_sys_delete_module.constprop.0+0x210/0x320
__x64_sys_delete_module+0x1f/0x30
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xae

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Yi Zhang <yi.zhang@redhat.com>
Cc: Bob Pearson <rpearsonhpe@gmail.com>
Fixes: 2aad3cd85370 ("scsi: scsi_debug: Address races following module load"; )
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/scsi_debug.c | 197 ++++++++++----------------------------
 1 file changed, 51 insertions(+), 146 deletions(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index c607755cce00..db6f4b96606c 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -32,7 +32,6 @@
 #include <linux/blkdev.h>
 #include <linux/crc-t10dif.h>
 #include <linux/spinlock.h>
-#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/atomic.h>
 #include <linux/hrtimer.h>
@@ -732,9 +731,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = {
 	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 };
 
-static atomic_t sdebug_num_hosts;
-static DEFINE_MUTEX(add_host_mutex);
-
+static int sdebug_num_hosts;
 static int sdebug_add_host = DEF_NUM_HOST;  /* in sysfs this is relative */
 static int sdebug_ato = DEF_ATO;
 static int sdebug_cdb_len = DEF_CDB_LEN;
@@ -781,7 +778,6 @@ static int sdebug_uuid_ctl = DEF_UUID_CTL;
 static bool sdebug_random = DEF_RANDOM;
 static bool sdebug_per_host_store = DEF_PER_HOST_STORE;
 static bool sdebug_removable = DEF_REMOVABLE;
-static bool sdebug_deflect_incoming;
 static bool sdebug_clustering;
 static bool sdebug_host_lock = DEF_HOST_LOCK;
 static bool sdebug_strict = DEF_STRICT;
@@ -5122,10 +5118,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp)
 		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
 	if (sdp->host->max_cmd_len != SDEBUG_MAX_CMD_LEN)
 		sdp->host->max_cmd_len = SDEBUG_MAX_CMD_LEN;
-	if (smp_load_acquire(&sdebug_deflect_incoming)) {
-		pr_info("Exit early due to deflect_incoming\n");
-		return 1;
-	}
 	if (devip == NULL) {
 		devip = find_build_dev_info(sdp);
 		if (devip == NULL)
@@ -5211,7 +5203,7 @@ static bool stop_queued_cmnd(struct scsi_cmnd *cmnd)
 }
 
 /* Deletes (stops) timers or work queues of all queued commands */
-static void stop_all_queued(bool done_with_no_conn)
+static void stop_all_queued(void)
 {
 	unsigned long iflags;
 	int j, k;
@@ -5220,15 +5212,13 @@ static void stop_all_queued(bool done_with_no_conn)
 	struct sdebug_queued_cmd *sqcp;
 	struct sdebug_dev_info *devip;
 	struct sdebug_defer *sd_dp;
-	struct scsi_cmnd *scp;
 
 	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
 		spin_lock_irqsave(&sqp->qc_lock, iflags);
 		for (k = 0; k < SDEBUG_CANQUEUE; ++k) {
 			if (test_bit(k, sqp->in_use_bm)) {
 				sqcp = &sqp->qc_arr[k];
-				scp = sqcp->a_cmnd;
-				if (!scp)
+				if (sqcp->a_cmnd == NULL)
 					continue;
 				devip = (struct sdebug_dev_info *)
 					sqcp->a_cmnd->device->hostdata;
@@ -5243,10 +5233,6 @@ static void stop_all_queued(bool done_with_no_conn)
 					l_defer_t = SDEB_DEFER_NONE;
 				spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 				stop_qc_helper(sd_dp, l_defer_t);
-				if (done_with_no_conn && l_defer_t != SDEB_DEFER_NONE) {
-					scp->result = DID_NO_CONNECT << 16;
-					scsi_done(scp);
-				}
 				clear_bit(k, sqp->in_use_bm);
 				spin_lock_irqsave(&sqp->qc_lock, iflags);
 			}
@@ -5389,7 +5375,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt)
 		}
 	}
 	spin_unlock(&sdebug_host_list_lock);
-	stop_all_queued(false);
+	stop_all_queued();
 	if (SDEBUG_OPT_RESET_NOISE & sdebug_opts)
 		sdev_printk(KERN_INFO, SCpnt->device,
 			    "%s: %d device(s) found\n", __func__, k);
@@ -5449,50 +5435,13 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size)
 	}
 }
 
-static void sdeb_block_all_queues(void)
-{
-	int j;
-	struct sdebug_queue *sqp;
-
-	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
-		atomic_set(&sqp->blocked, (int)true);
-}
-
-static void sdeb_unblock_all_queues(void)
+static void block_unblock_all_queues(bool block)
 {
 	int j;
 	struct sdebug_queue *sqp;
 
 	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
-		atomic_set(&sqp->blocked, (int)false);
-}
-
-static void
-sdeb_add_n_hosts(int num_hosts)
-{
-	if (num_hosts < 1)
-		return;
-	do {
-		bool found;
-		unsigned long idx;
-		struct sdeb_store_info *sip;
-		bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
-
-		found = false;
-		if (want_phs) {
-			xa_for_each_marked(per_store_ap, idx, sip, SDEB_XA_NOT_IN_USE) {
-				sdeb_most_recent_idx = (int)idx;
-				found = true;
-				break;
-			}
-			if (found)	/* re-use case */
-				sdebug_add_host_helper((int)idx);
-			else
-				sdebug_do_add_host(true	/* make new store */);
-		} else {
-			sdebug_do_add_host(false);
-		}
-	} while (--num_hosts);
+		atomic_set(&sqp->blocked, (int)block);
 }
 
 /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1
@@ -5505,10 +5454,10 @@ static void tweak_cmnd_count(void)
 	modulo = abs(sdebug_every_nth);
 	if (modulo < 2)
 		return;
-	sdeb_block_all_queues();
+	block_unblock_all_queues(true);
 	count = atomic_read(&sdebug_cmnd_count);
 	atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo);
-	sdeb_unblock_all_queues();
+	block_unblock_all_queues(false);
 }
 
 static void clear_queue_stats(void)
@@ -5526,15 +5475,6 @@ static bool inject_on_this_cmd(void)
 	return (atomic_read(&sdebug_cmnd_count) % abs(sdebug_every_nth)) == 0;
 }
 
-static int process_deflect_incoming(struct scsi_cmnd *scp)
-{
-	u8 opcode = scp->cmnd[0];
-
-	if (opcode == SYNCHRONIZE_CACHE || opcode == SYNCHRONIZE_CACHE_16)
-		return 0;
-	return DID_NO_CONNECT << 16;
-}
-
 #define INCLUSIVE_TIMING_MAX_NS 1000000		/* 1 millisecond */
 
 /* Complete the processing of the thread that queued a SCSI command to this
@@ -5544,7 +5484,8 @@ static int process_deflect_incoming(struct scsi_cmnd *scp)
  */
 static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 			 int scsi_result,
-			 int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *),
+			 int (*pfp)(struct scsi_cmnd *,
+				    struct sdebug_dev_info *),
 			 int delta_jiff, int ndelay)
 {
 	bool new_sd_dp;
@@ -5565,27 +5506,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 	}
 	sdp = cmnd->device;
 
-	if (delta_jiff == 0) {
-		sqp = get_queue(cmnd);
-		if (atomic_read(&sqp->blocked)) {
-			if (smp_load_acquire(&sdebug_deflect_incoming))
-				return process_deflect_incoming(cmnd);
-			else
-				return SCSI_MLQUEUE_HOST_BUSY;
-		}
+	if (delta_jiff == 0)
 		goto respond_in_thread;
-	}
 
 	sqp = get_queue(cmnd);
 	spin_lock_irqsave(&sqp->qc_lock, iflags);
 	if (unlikely(atomic_read(&sqp->blocked))) {
 		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-		if (smp_load_acquire(&sdebug_deflect_incoming)) {
-			scsi_result = process_deflect_incoming(cmnd);
-			goto respond_in_thread;
-		}
-		if (sdebug_verbose)
-			pr_info("blocked --> SCSI_MLQUEUE_HOST_BUSY\n");
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 	num_in_q = atomic_read(&devip->num_in_q);
@@ -5774,12 +5701,8 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 respond_in_thread:	/* call back to mid-layer using invocation thread */
 	cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0;
 	cmnd->result &= ~SDEG_RES_IMMED_MASK;
-	if (cmnd->result == 0 && scsi_result != 0) {
+	if (cmnd->result == 0 && scsi_result != 0)
 		cmnd->result = scsi_result;
-		if (sdebug_verbose)
-			pr_info("respond_in_thread: tag=0x%x, scp->result=0x%x\n",
-				blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)), scsi_result);
-	}
 	scsi_done(cmnd);
 	return 0;
 }
@@ -6064,7 +5987,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
 			int j, k;
 			struct sdebug_queue *sqp;
 
-			sdeb_block_all_queues();
+			block_unblock_all_queues(true);
 			for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
 			     ++j, ++sqp) {
 				k = find_first_bit(sqp->in_use_bm,
@@ -6078,7 +6001,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
 				sdebug_jdelay = jdelay;
 				sdebug_ndelay = 0;
 			}
-			sdeb_unblock_all_queues();
+			block_unblock_all_queues(false);
 		}
 		return res;
 	}
@@ -6104,7 +6027,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
 			int j, k;
 			struct sdebug_queue *sqp;
 
-			sdeb_block_all_queues();
+			block_unblock_all_queues(true);
 			for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
 			     ++j, ++sqp) {
 				k = find_first_bit(sqp->in_use_bm,
@@ -6119,7 +6042,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
 				sdebug_jdelay = ndelay  ? JDELAY_OVERRIDDEN
 							: DEF_JDELAY;
 			}
-			sdeb_unblock_all_queues();
+			block_unblock_all_queues(false);
 		}
 		return res;
 	}
@@ -6433,7 +6356,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
 	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
 	    (n <= SDEBUG_CANQUEUE) &&
 	    (sdebug_host_max_queue == 0)) {
-		sdeb_block_all_queues();
+		block_unblock_all_queues(true);
 		k = 0;
 		for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
 		     ++j, ++sqp) {
@@ -6448,7 +6371,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
 			atomic_set(&retired_max_queue, k + 1);
 		else
 			atomic_set(&retired_max_queue, 0);
-		sdeb_unblock_all_queues();
+		block_unblock_all_queues(false);
 		return count;
 	}
 	return -EINVAL;
@@ -6537,48 +6460,43 @@ static DRIVER_ATTR_RW(virtual_gb);
 static ssize_t add_host_show(struct device_driver *ddp, char *buf)
 {
 	/* absolute number of hosts currently active is what is shown */
-	return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&sdebug_num_hosts));
+	return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts);
 }
 
-/*
- * Accept positive and negative values. Hex values (only positive) may be prefixed by '0x'.
- * To remove all hosts use a large negative number (e.g. -9999). The value 0 does nothing.
- * Returns -EBUSY if another add_host sysfs invocation is active.
- */
 static ssize_t add_host_store(struct device_driver *ddp, const char *buf,
 			      size_t count)
 {
+	bool found;
+	unsigned long idx;
+	struct sdeb_store_info *sip;
+	bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
 	int delta_hosts;
 
-	if (count == 0 || kstrtoint(buf, 0, &delta_hosts))
+	if (sscanf(buf, "%d", &delta_hosts) != 1)
 		return -EINVAL;
-	if (sdebug_verbose)
-		pr_info("prior num_hosts=%d, num_to_add=%d\n",
-			atomic_read(&sdebug_num_hosts), delta_hosts);
-	if (delta_hosts == 0)
-		return count;
-	if (mutex_trylock(&add_host_mutex) == 0)
-		return -EBUSY;
 	if (delta_hosts > 0) {
-		sdeb_add_n_hosts(delta_hosts);
-	} else if (delta_hosts < 0) {
-		smp_store_release(&sdebug_deflect_incoming, true);
-		sdeb_block_all_queues();
-		if (delta_hosts >= atomic_read(&sdebug_num_hosts))
-			stop_all_queued(true);
 		do {
-			if (atomic_read(&sdebug_num_hosts) < 1) {
-				free_all_queued();
-				break;
+			found = false;
+			if (want_phs) {
+				xa_for_each_marked(per_store_ap, idx, sip,
+						   SDEB_XA_NOT_IN_USE) {
+					sdeb_most_recent_idx = (int)idx;
+					found = true;
+					break;
+				}
+				if (found)	/* re-use case */
+					sdebug_add_host_helper((int)idx);
+				else
+					sdebug_do_add_host(true);
+			} else {
+				sdebug_do_add_host(false);
 			}
+		} while (--delta_hosts);
+	} else if (delta_hosts < 0) {
+		do {
 			sdebug_do_remove_host(false);
 		} while (++delta_hosts);
-		sdeb_unblock_all_queues();
-		smp_store_release(&sdebug_deflect_incoming, false);
 	}
-	mutex_unlock(&add_host_mutex);
-	if (sdebug_verbose)
-		pr_info("post num_hosts=%d\n", atomic_read(&sdebug_num_hosts));
 	return count;
 }
 static DRIVER_ATTR_RW(add_host);
@@ -7089,10 +7007,6 @@ static int __init scsi_debug_init(void)
 	sdebug_add_host = 0;
 
 	for (k = 0; k < hosts_to_add; k++) {
-		if (smp_load_acquire(&sdebug_deflect_incoming)) {
-			pr_info("exit early as sdebug_deflect_incoming is set\n");
-			return 0;
-		}
 		if (want_store && k == 0) {
 			ret = sdebug_add_host_helper(idx);
 			if (ret < 0) {
@@ -7110,12 +7024,8 @@ static int __init scsi_debug_init(void)
 		}
 	}
 	if (sdebug_verbose)
-		pr_info("built %d host(s)\n", atomic_read(&sdebug_num_hosts));
+		pr_info("built %d host(s)\n", sdebug_num_hosts);
 
-	/*
-	 * Even though all the hosts have been established, due to async device (LU) scanning
-	 * by the scsi mid-level, there may still be devices (LUs) being set up.
-	 */
 	return 0;
 
 bus_unreg:
@@ -7131,17 +7041,12 @@ static int __init scsi_debug_init(void)
 
 static void __exit scsi_debug_exit(void)
 {
-	int k;
+	int k = sdebug_num_hosts;
 
-	/* Possible race with LUs still being set up; stop them asap */
-	sdeb_block_all_queues();
-	smp_store_release(&sdebug_deflect_incoming, true);
-	stop_all_queued(false);
-	for (k = 0; atomic_read(&sdebug_num_hosts) > 0; k++)
+	stop_all_queued();
+	for (; k; k--)
 		sdebug_do_remove_host(true);
 	free_all_queued();
-	if (sdebug_verbose)
-		pr_info("removed %d hosts\n", k);
 	driver_unregister(&sdebug_driverfs_driver);
 	bus_unregister(&pseudo_lld_bus);
 	root_device_unregister(pseudo_primary);
@@ -7311,13 +7216,13 @@ static int sdebug_add_host_helper(int per_host_idx)
 	sdbg_host->dev.bus = &pseudo_lld_bus;
 	sdbg_host->dev.parent = pseudo_primary;
 	sdbg_host->dev.release = &sdebug_release_adapter;
-	dev_set_name(&sdbg_host->dev, "adapter%d", atomic_read(&sdebug_num_hosts));
+	dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts);
 
 	error = device_register(&sdbg_host->dev);
 	if (error)
 		goto clean;
 
-	atomic_inc(&sdebug_num_hosts);
+	++sdebug_num_hosts;
 	return 0;
 
 clean:
@@ -7381,7 +7286,7 @@ static void sdebug_do_remove_host(bool the_end)
 		return;
 
 	device_unregister(&sdbg_host->dev);
-	atomic_dec(&sdebug_num_hosts);
+	--sdebug_num_hosts;
 }
 
 static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
@@ -7389,10 +7294,10 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
 	int num_in_q = 0;
 	struct sdebug_dev_info *devip;
 
-	sdeb_block_all_queues();
+	block_unblock_all_queues(true);
 	devip = (struct sdebug_dev_info *)sdev->hostdata;
 	if (NULL == devip) {
-		sdeb_unblock_all_queues();
+		block_unblock_all_queues(false);
 		return	-ENODEV;
 	}
 	num_in_q = atomic_read(&devip->num_in_q);
@@ -7411,7 +7316,7 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
 		sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n",
 			    __func__, qdepth, num_in_q);
 	}
-	sdeb_unblock_all_queues();
+	block_unblock_all_queues(false);
 	return sdev->queue_depth;
 }
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-09  4:37 [PATCH] Revert "scsi: scsi_debug: Address races following module load" Bart Van Assche
@ 2022-04-09 19:10 ` Douglas Gilbert
  2022-04-12 17:52   ` Luis Chamberlain
  2022-04-10 13:15 ` Yi Zhang
  2022-04-12  2:36 ` Martin K. Petersen
  2 siblings, 1 reply; 8+ messages in thread
From: Douglas Gilbert @ 2022-04-09 19:10 UTC (permalink / raw)
  To: Bart Van Assche, Martin K . Petersen
  Cc: linux-scsi, Yi Zhang, Bob Pearson, James E.J. Bottomley,
	Luis Chamberlain

On 2022-04-09 00:37, Bart Van Assche wrote:
> Revert the patch mentioned in the subject since it blocks I/O after
> module unload has started while this is a legitimate use case. For e.g.
> blktests test case srp/001 that patch causes a command timeout to be
> triggered for the following call stack:
> 
> __schedule+0x4c3/0xd20
> schedule+0x82/0x110
> schedule_timeout+0x122/0x200
> io_schedule_timeout+0x7b/0xc0
> __wait_for_common+0x2bc/0x380
> wait_for_completion_io_timeout+0x1d/0x20
> blk_execute_rq+0x1db/0x200
> __scsi_execute+0x1fb/0x310
> sd_sync_cache+0x155/0x2c0 [sd_mod]
> sd_shutdown+0xbb/0x190 [sd_mod]
> sd_remove+0x5b/0x80 [sd_mod]
> device_remove+0x9a/0xb0
> device_release_driver_internal+0x2c5/0x360
> device_release_driver+0x12/0x20
> bus_remove_device+0x1aa/0x270
> device_del+0x2d4/0x640
> __scsi_remove_device+0x168/0x1a0
> scsi_forget_host+0xa8/0xb0
> scsi_remove_host+0x9b/0x150
> sdebug_driver_remove+0x3d/0x140 [scsi_debug]
> device_remove+0x6f/0xb0
> device_release_driver_internal+0x2c5/0x360
> device_release_driver+0x12/0x20
> bus_remove_device+0x1aa/0x270
> device_del+0x2d4/0x640
> device_unregister+0x18/0x70
> sdebug_do_remove_host+0x138/0x180 [scsi_debug]
> scsi_debug_exit+0x45/0xd5 [scsi_debug]
> __do_sys_delete_module.constprop.0+0x210/0x320
> __x64_sys_delete_module+0x1f/0x30
> do_syscall_64+0x35/0x80
> entry_SYSCALL_64_after_hwframe+0x44/0xae
> 
> Reported-by: Yi Zhang <yi.zhang@redhat.com>
> Cc: Douglas Gilbert <dgilbert@interlog.com>
> Cc: Yi Zhang <yi.zhang@redhat.com>
> Cc: Bob Pearson <rpearsonhpe@gmail.com>
> Fixes: 2aad3cd85370 ("scsi: scsi_debug: Address races following module load"; )
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

Acked-by: Douglas Gilbert <dgilbert@interlog.com>

This was a relatively old patch developed in conjunction with Luis Chamberlain
<mcgrof@kernel.org>. So it may have been overtaken by other developments.
I forwarded the "[bug report][bisected] modprob -r scsi-debug take more than
3mins during blktests srp/ tests" email to Luis but haven't heard back. So I'm
happy to remove it.

> ---
>   drivers/scsi/scsi_debug.c | 197 ++++++++++----------------------------
>   1 file changed, 51 insertions(+), 146 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index c607755cce00..db6f4b96606c 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -32,7 +32,6 @@
>   #include <linux/blkdev.h>
>   #include <linux/crc-t10dif.h>
>   #include <linux/spinlock.h>
> -#include <linux/mutex.h>
>   #include <linux/interrupt.h>
>   #include <linux/atomic.h>
>   #include <linux/hrtimer.h>
> @@ -732,9 +731,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = {
>   	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
>   };
>   
> -static atomic_t sdebug_num_hosts;
> -static DEFINE_MUTEX(add_host_mutex);
> -
> +static int sdebug_num_hosts;
>   static int sdebug_add_host = DEF_NUM_HOST;  /* in sysfs this is relative */
>   static int sdebug_ato = DEF_ATO;
>   static int sdebug_cdb_len = DEF_CDB_LEN;
> @@ -781,7 +778,6 @@ static int sdebug_uuid_ctl = DEF_UUID_CTL;
>   static bool sdebug_random = DEF_RANDOM;
>   static bool sdebug_per_host_store = DEF_PER_HOST_STORE;
>   static bool sdebug_removable = DEF_REMOVABLE;
> -static bool sdebug_deflect_incoming;
>   static bool sdebug_clustering;
>   static bool sdebug_host_lock = DEF_HOST_LOCK;
>   static bool sdebug_strict = DEF_STRICT;
> @@ -5122,10 +5118,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp)
>   		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
>   	if (sdp->host->max_cmd_len != SDEBUG_MAX_CMD_LEN)
>   		sdp->host->max_cmd_len = SDEBUG_MAX_CMD_LEN;
> -	if (smp_load_acquire(&sdebug_deflect_incoming)) {
> -		pr_info("Exit early due to deflect_incoming\n");
> -		return 1;
> -	}
>   	if (devip == NULL) {
>   		devip = find_build_dev_info(sdp);
>   		if (devip == NULL)
> @@ -5211,7 +5203,7 @@ static bool stop_queued_cmnd(struct scsi_cmnd *cmnd)
>   }
>   
>   /* Deletes (stops) timers or work queues of all queued commands */
> -static void stop_all_queued(bool done_with_no_conn)
> +static void stop_all_queued(void)
>   {
>   	unsigned long iflags;
>   	int j, k;
> @@ -5220,15 +5212,13 @@ static void stop_all_queued(bool done_with_no_conn)
>   	struct sdebug_queued_cmd *sqcp;
>   	struct sdebug_dev_info *devip;
>   	struct sdebug_defer *sd_dp;
> -	struct scsi_cmnd *scp;
>   
>   	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
>   		spin_lock_irqsave(&sqp->qc_lock, iflags);
>   		for (k = 0; k < SDEBUG_CANQUEUE; ++k) {
>   			if (test_bit(k, sqp->in_use_bm)) {
>   				sqcp = &sqp->qc_arr[k];
> -				scp = sqcp->a_cmnd;
> -				if (!scp)
> +				if (sqcp->a_cmnd == NULL)
>   					continue;
>   				devip = (struct sdebug_dev_info *)
>   					sqcp->a_cmnd->device->hostdata;
> @@ -5243,10 +5233,6 @@ static void stop_all_queued(bool done_with_no_conn)
>   					l_defer_t = SDEB_DEFER_NONE;
>   				spin_unlock_irqrestore(&sqp->qc_lock, iflags);
>   				stop_qc_helper(sd_dp, l_defer_t);
> -				if (done_with_no_conn && l_defer_t != SDEB_DEFER_NONE) {
> -					scp->result = DID_NO_CONNECT << 16;
> -					scsi_done(scp);
> -				}
>   				clear_bit(k, sqp->in_use_bm);
>   				spin_lock_irqsave(&sqp->qc_lock, iflags);
>   			}
> @@ -5389,7 +5375,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt)
>   		}
>   	}
>   	spin_unlock(&sdebug_host_list_lock);
> -	stop_all_queued(false);
> +	stop_all_queued();
>   	if (SDEBUG_OPT_RESET_NOISE & sdebug_opts)
>   		sdev_printk(KERN_INFO, SCpnt->device,
>   			    "%s: %d device(s) found\n", __func__, k);
> @@ -5449,50 +5435,13 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size)
>   	}
>   }
>   
> -static void sdeb_block_all_queues(void)
> -{
> -	int j;
> -	struct sdebug_queue *sqp;
> -
> -	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
> -		atomic_set(&sqp->blocked, (int)true);
> -}
> -
> -static void sdeb_unblock_all_queues(void)
> +static void block_unblock_all_queues(bool block)
>   {
>   	int j;
>   	struct sdebug_queue *sqp;
>   
>   	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
> -		atomic_set(&sqp->blocked, (int)false);
> -}
> -
> -static void
> -sdeb_add_n_hosts(int num_hosts)
> -{
> -	if (num_hosts < 1)
> -		return;
> -	do {
> -		bool found;
> -		unsigned long idx;
> -		struct sdeb_store_info *sip;
> -		bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
> -
> -		found = false;
> -		if (want_phs) {
> -			xa_for_each_marked(per_store_ap, idx, sip, SDEB_XA_NOT_IN_USE) {
> -				sdeb_most_recent_idx = (int)idx;
> -				found = true;
> -				break;
> -			}
> -			if (found)	/* re-use case */
> -				sdebug_add_host_helper((int)idx);
> -			else
> -				sdebug_do_add_host(true	/* make new store */);
> -		} else {
> -			sdebug_do_add_host(false);
> -		}
> -	} while (--num_hosts);
> +		atomic_set(&sqp->blocked, (int)block);
>   }
>   
>   /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1
> @@ -5505,10 +5454,10 @@ static void tweak_cmnd_count(void)
>   	modulo = abs(sdebug_every_nth);
>   	if (modulo < 2)
>   		return;
> -	sdeb_block_all_queues();
> +	block_unblock_all_queues(true);
>   	count = atomic_read(&sdebug_cmnd_count);
>   	atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo);
> -	sdeb_unblock_all_queues();
> +	block_unblock_all_queues(false);
>   }
>   
>   static void clear_queue_stats(void)
> @@ -5526,15 +5475,6 @@ static bool inject_on_this_cmd(void)
>   	return (atomic_read(&sdebug_cmnd_count) % abs(sdebug_every_nth)) == 0;
>   }
>   
> -static int process_deflect_incoming(struct scsi_cmnd *scp)
> -{
> -	u8 opcode = scp->cmnd[0];
> -
> -	if (opcode == SYNCHRONIZE_CACHE || opcode == SYNCHRONIZE_CACHE_16)
> -		return 0;
> -	return DID_NO_CONNECT << 16;
> -}
> -
>   #define INCLUSIVE_TIMING_MAX_NS 1000000		/* 1 millisecond */
>   
>   /* Complete the processing of the thread that queued a SCSI command to this
> @@ -5544,7 +5484,8 @@ static int process_deflect_incoming(struct scsi_cmnd *scp)
>    */
>   static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>   			 int scsi_result,
> -			 int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *),
> +			 int (*pfp)(struct scsi_cmnd *,
> +				    struct sdebug_dev_info *),
>   			 int delta_jiff, int ndelay)
>   {
>   	bool new_sd_dp;
> @@ -5565,27 +5506,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>   	}
>   	sdp = cmnd->device;
>   
> -	if (delta_jiff == 0) {
> -		sqp = get_queue(cmnd);
> -		if (atomic_read(&sqp->blocked)) {
> -			if (smp_load_acquire(&sdebug_deflect_incoming))
> -				return process_deflect_incoming(cmnd);
> -			else
> -				return SCSI_MLQUEUE_HOST_BUSY;
> -		}
> +	if (delta_jiff == 0)
>   		goto respond_in_thread;
> -	}
>   
>   	sqp = get_queue(cmnd);
>   	spin_lock_irqsave(&sqp->qc_lock, iflags);
>   	if (unlikely(atomic_read(&sqp->blocked))) {
>   		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> -		if (smp_load_acquire(&sdebug_deflect_incoming)) {
> -			scsi_result = process_deflect_incoming(cmnd);
> -			goto respond_in_thread;
> -		}
> -		if (sdebug_verbose)
> -			pr_info("blocked --> SCSI_MLQUEUE_HOST_BUSY\n");
>   		return SCSI_MLQUEUE_HOST_BUSY;
>   	}
>   	num_in_q = atomic_read(&devip->num_in_q);
> @@ -5774,12 +5701,8 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>   respond_in_thread:	/* call back to mid-layer using invocation thread */
>   	cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0;
>   	cmnd->result &= ~SDEG_RES_IMMED_MASK;
> -	if (cmnd->result == 0 && scsi_result != 0) {
> +	if (cmnd->result == 0 && scsi_result != 0)
>   		cmnd->result = scsi_result;
> -		if (sdebug_verbose)
> -			pr_info("respond_in_thread: tag=0x%x, scp->result=0x%x\n",
> -				blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)), scsi_result);
> -	}
>   	scsi_done(cmnd);
>   	return 0;
>   }
> @@ -6064,7 +5987,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
>   			int j, k;
>   			struct sdebug_queue *sqp;
>   
> -			sdeb_block_all_queues();
> +			block_unblock_all_queues(true);
>   			for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
>   			     ++j, ++sqp) {
>   				k = find_first_bit(sqp->in_use_bm,
> @@ -6078,7 +6001,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
>   				sdebug_jdelay = jdelay;
>   				sdebug_ndelay = 0;
>   			}
> -			sdeb_unblock_all_queues();
> +			block_unblock_all_queues(false);
>   		}
>   		return res;
>   	}
> @@ -6104,7 +6027,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
>   			int j, k;
>   			struct sdebug_queue *sqp;
>   
> -			sdeb_block_all_queues();
> +			block_unblock_all_queues(true);
>   			for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
>   			     ++j, ++sqp) {
>   				k = find_first_bit(sqp->in_use_bm,
> @@ -6119,7 +6042,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
>   				sdebug_jdelay = ndelay  ? JDELAY_OVERRIDDEN
>   							: DEF_JDELAY;
>   			}
> -			sdeb_unblock_all_queues();
> +			block_unblock_all_queues(false);
>   		}
>   		return res;
>   	}
> @@ -6433,7 +6356,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
>   	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
>   	    (n <= SDEBUG_CANQUEUE) &&
>   	    (sdebug_host_max_queue == 0)) {
> -		sdeb_block_all_queues();
> +		block_unblock_all_queues(true);
>   		k = 0;
>   		for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
>   		     ++j, ++sqp) {
> @@ -6448,7 +6371,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
>   			atomic_set(&retired_max_queue, k + 1);
>   		else
>   			atomic_set(&retired_max_queue, 0);
> -		sdeb_unblock_all_queues();
> +		block_unblock_all_queues(false);
>   		return count;
>   	}
>   	return -EINVAL;
> @@ -6537,48 +6460,43 @@ static DRIVER_ATTR_RW(virtual_gb);
>   static ssize_t add_host_show(struct device_driver *ddp, char *buf)
>   {
>   	/* absolute number of hosts currently active is what is shown */
> -	return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&sdebug_num_hosts));
> +	return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts);
>   }
>   
> -/*
> - * Accept positive and negative values. Hex values (only positive) may be prefixed by '0x'.
> - * To remove all hosts use a large negative number (e.g. -9999). The value 0 does nothing.
> - * Returns -EBUSY if another add_host sysfs invocation is active.
> - */
>   static ssize_t add_host_store(struct device_driver *ddp, const char *buf,
>   			      size_t count)
>   {
> +	bool found;
> +	unsigned long idx;
> +	struct sdeb_store_info *sip;
> +	bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
>   	int delta_hosts;
>   
> -	if (count == 0 || kstrtoint(buf, 0, &delta_hosts))
> +	if (sscanf(buf, "%d", &delta_hosts) != 1)
>   		return -EINVAL;
> -	if (sdebug_verbose)
> -		pr_info("prior num_hosts=%d, num_to_add=%d\n",
> -			atomic_read(&sdebug_num_hosts), delta_hosts);
> -	if (delta_hosts == 0)
> -		return count;
> -	if (mutex_trylock(&add_host_mutex) == 0)
> -		return -EBUSY;
>   	if (delta_hosts > 0) {
> -		sdeb_add_n_hosts(delta_hosts);
> -	} else if (delta_hosts < 0) {
> -		smp_store_release(&sdebug_deflect_incoming, true);
> -		sdeb_block_all_queues();
> -		if (delta_hosts >= atomic_read(&sdebug_num_hosts))
> -			stop_all_queued(true);
>   		do {
> -			if (atomic_read(&sdebug_num_hosts) < 1) {
> -				free_all_queued();
> -				break;
> +			found = false;
> +			if (want_phs) {
> +				xa_for_each_marked(per_store_ap, idx, sip,
> +						   SDEB_XA_NOT_IN_USE) {
> +					sdeb_most_recent_idx = (int)idx;
> +					found = true;
> +					break;
> +				}
> +				if (found)	/* re-use case */
> +					sdebug_add_host_helper((int)idx);
> +				else
> +					sdebug_do_add_host(true);
> +			} else {
> +				sdebug_do_add_host(false);
>   			}
> +		} while (--delta_hosts);
> +	} else if (delta_hosts < 0) {
> +		do {
>   			sdebug_do_remove_host(false);
>   		} while (++delta_hosts);
> -		sdeb_unblock_all_queues();
> -		smp_store_release(&sdebug_deflect_incoming, false);
>   	}
> -	mutex_unlock(&add_host_mutex);
> -	if (sdebug_verbose)
> -		pr_info("post num_hosts=%d\n", atomic_read(&sdebug_num_hosts));
>   	return count;
>   }
>   static DRIVER_ATTR_RW(add_host);
> @@ -7089,10 +7007,6 @@ static int __init scsi_debug_init(void)
>   	sdebug_add_host = 0;
>   
>   	for (k = 0; k < hosts_to_add; k++) {
> -		if (smp_load_acquire(&sdebug_deflect_incoming)) {
> -			pr_info("exit early as sdebug_deflect_incoming is set\n");
> -			return 0;
> -		}
>   		if (want_store && k == 0) {
>   			ret = sdebug_add_host_helper(idx);
>   			if (ret < 0) {
> @@ -7110,12 +7024,8 @@ static int __init scsi_debug_init(void)
>   		}
>   	}
>   	if (sdebug_verbose)
> -		pr_info("built %d host(s)\n", atomic_read(&sdebug_num_hosts));
> +		pr_info("built %d host(s)\n", sdebug_num_hosts);
>   
> -	/*
> -	 * Even though all the hosts have been established, due to async device (LU) scanning
> -	 * by the scsi mid-level, there may still be devices (LUs) being set up.
> -	 */
>   	return 0;
>   
>   bus_unreg:
> @@ -7131,17 +7041,12 @@ static int __init scsi_debug_init(void)
>   
>   static void __exit scsi_debug_exit(void)
>   {
> -	int k;
> +	int k = sdebug_num_hosts;
>   
> -	/* Possible race with LUs still being set up; stop them asap */
> -	sdeb_block_all_queues();
> -	smp_store_release(&sdebug_deflect_incoming, true);
> -	stop_all_queued(false);
> -	for (k = 0; atomic_read(&sdebug_num_hosts) > 0; k++)
> +	stop_all_queued();
> +	for (; k; k--)
>   		sdebug_do_remove_host(true);
>   	free_all_queued();
> -	if (sdebug_verbose)
> -		pr_info("removed %d hosts\n", k);
>   	driver_unregister(&sdebug_driverfs_driver);
>   	bus_unregister(&pseudo_lld_bus);
>   	root_device_unregister(pseudo_primary);
> @@ -7311,13 +7216,13 @@ static int sdebug_add_host_helper(int per_host_idx)
>   	sdbg_host->dev.bus = &pseudo_lld_bus;
>   	sdbg_host->dev.parent = pseudo_primary;
>   	sdbg_host->dev.release = &sdebug_release_adapter;
> -	dev_set_name(&sdbg_host->dev, "adapter%d", atomic_read(&sdebug_num_hosts));
> +	dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts);
>   
>   	error = device_register(&sdbg_host->dev);
>   	if (error)
>   		goto clean;
>   
> -	atomic_inc(&sdebug_num_hosts);
> +	++sdebug_num_hosts;
>   	return 0;
>   
>   clean:
> @@ -7381,7 +7286,7 @@ static void sdebug_do_remove_host(bool the_end)
>   		return;
>   
>   	device_unregister(&sdbg_host->dev);
> -	atomic_dec(&sdebug_num_hosts);
> +	--sdebug_num_hosts;
>   }
>   
>   static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
> @@ -7389,10 +7294,10 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
>   	int num_in_q = 0;
>   	struct sdebug_dev_info *devip;
>   
> -	sdeb_block_all_queues();
> +	block_unblock_all_queues(true);
>   	devip = (struct sdebug_dev_info *)sdev->hostdata;
>   	if (NULL == devip) {
> -		sdeb_unblock_all_queues();
> +		block_unblock_all_queues(false);
>   		return	-ENODEV;
>   	}
>   	num_in_q = atomic_read(&devip->num_in_q);
> @@ -7411,7 +7316,7 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
>   		sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n",
>   			    __func__, qdepth, num_in_q);
>   	}
> -	sdeb_unblock_all_queues();
> +	block_unblock_all_queues(false);
>   	return sdev->queue_depth;
>   }
>   


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-09  4:37 [PATCH] Revert "scsi: scsi_debug: Address races following module load" Bart Van Assche
  2022-04-09 19:10 ` Douglas Gilbert
@ 2022-04-10 13:15 ` Yi Zhang
  2022-04-10 17:00   ` Bob Pearson
  2022-04-12  2:36 ` Martin K. Petersen
  2 siblings, 1 reply; 8+ messages in thread
From: Yi Zhang @ 2022-04-10 13:15 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Martin K . Petersen, linux-scsi, Douglas Gilbert, Bob Pearson,
	James E.J. Bottomley

Confirmed the blktests srp/ issue was fixed with this revert:

Tested-by: Yi Zhang <yi.zhang@redhat.com>

On Sat, Apr 9, 2022 at 12:37 PM Bart Van Assche <bvanassche@acm.org> wrote:
>
> Revert the patch mentioned in the subject since it blocks I/O after
> module unload has started while this is a legitimate use case. For e.g.
> blktests test case srp/001 that patch causes a command timeout to be
> triggered for the following call stack:
>
> __schedule+0x4c3/0xd20
> schedule+0x82/0x110
> schedule_timeout+0x122/0x200
> io_schedule_timeout+0x7b/0xc0
> __wait_for_common+0x2bc/0x380
> wait_for_completion_io_timeout+0x1d/0x20
> blk_execute_rq+0x1db/0x200
> __scsi_execute+0x1fb/0x310
> sd_sync_cache+0x155/0x2c0 [sd_mod]
> sd_shutdown+0xbb/0x190 [sd_mod]
> sd_remove+0x5b/0x80 [sd_mod]
> device_remove+0x9a/0xb0
> device_release_driver_internal+0x2c5/0x360
> device_release_driver+0x12/0x20
> bus_remove_device+0x1aa/0x270
> device_del+0x2d4/0x640
> __scsi_remove_device+0x168/0x1a0
> scsi_forget_host+0xa8/0xb0
> scsi_remove_host+0x9b/0x150
> sdebug_driver_remove+0x3d/0x140 [scsi_debug]
> device_remove+0x6f/0xb0
> device_release_driver_internal+0x2c5/0x360
> device_release_driver+0x12/0x20
> bus_remove_device+0x1aa/0x270
> device_del+0x2d4/0x640
> device_unregister+0x18/0x70
> sdebug_do_remove_host+0x138/0x180 [scsi_debug]
> scsi_debug_exit+0x45/0xd5 [scsi_debug]
> __do_sys_delete_module.constprop.0+0x210/0x320
> __x64_sys_delete_module+0x1f/0x30
> do_syscall_64+0x35/0x80
> entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Reported-by: Yi Zhang <yi.zhang@redhat.com>
> Cc: Douglas Gilbert <dgilbert@interlog.com>
> Cc: Yi Zhang <yi.zhang@redhat.com>
> Cc: Bob Pearson <rpearsonhpe@gmail.com>
> Fixes: 2aad3cd85370 ("scsi: scsi_debug: Address races following module load"; )
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/scsi_debug.c | 197 ++++++++++----------------------------
>  1 file changed, 51 insertions(+), 146 deletions(-)
>
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index c607755cce00..db6f4b96606c 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -32,7 +32,6 @@
>  #include <linux/blkdev.h>
>  #include <linux/crc-t10dif.h>
>  #include <linux/spinlock.h>
> -#include <linux/mutex.h>
>  #include <linux/interrupt.h>
>  #include <linux/atomic.h>
>  #include <linux/hrtimer.h>
> @@ -732,9 +731,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = {
>             {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
>  };
>
> -static atomic_t sdebug_num_hosts;
> -static DEFINE_MUTEX(add_host_mutex);
> -
> +static int sdebug_num_hosts;
>  static int sdebug_add_host = DEF_NUM_HOST;  /* in sysfs this is relative */
>  static int sdebug_ato = DEF_ATO;
>  static int sdebug_cdb_len = DEF_CDB_LEN;
> @@ -781,7 +778,6 @@ static int sdebug_uuid_ctl = DEF_UUID_CTL;
>  static bool sdebug_random = DEF_RANDOM;
>  static bool sdebug_per_host_store = DEF_PER_HOST_STORE;
>  static bool sdebug_removable = DEF_REMOVABLE;
> -static bool sdebug_deflect_incoming;
>  static bool sdebug_clustering;
>  static bool sdebug_host_lock = DEF_HOST_LOCK;
>  static bool sdebug_strict = DEF_STRICT;
> @@ -5122,10 +5118,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp)
>                        sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
>         if (sdp->host->max_cmd_len != SDEBUG_MAX_CMD_LEN)
>                 sdp->host->max_cmd_len = SDEBUG_MAX_CMD_LEN;
> -       if (smp_load_acquire(&sdebug_deflect_incoming)) {
> -               pr_info("Exit early due to deflect_incoming\n");
> -               return 1;
> -       }
>         if (devip == NULL) {
>                 devip = find_build_dev_info(sdp);
>                 if (devip == NULL)
> @@ -5211,7 +5203,7 @@ static bool stop_queued_cmnd(struct scsi_cmnd *cmnd)
>  }
>
>  /* Deletes (stops) timers or work queues of all queued commands */
> -static void stop_all_queued(bool done_with_no_conn)
> +static void stop_all_queued(void)
>  {
>         unsigned long iflags;
>         int j, k;
> @@ -5220,15 +5212,13 @@ static void stop_all_queued(bool done_with_no_conn)
>         struct sdebug_queued_cmd *sqcp;
>         struct sdebug_dev_info *devip;
>         struct sdebug_defer *sd_dp;
> -       struct scsi_cmnd *scp;
>
>         for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
>                 spin_lock_irqsave(&sqp->qc_lock, iflags);
>                 for (k = 0; k < SDEBUG_CANQUEUE; ++k) {
>                         if (test_bit(k, sqp->in_use_bm)) {
>                                 sqcp = &sqp->qc_arr[k];
> -                               scp = sqcp->a_cmnd;
> -                               if (!scp)
> +                               if (sqcp->a_cmnd == NULL)
>                                         continue;
>                                 devip = (struct sdebug_dev_info *)
>                                         sqcp->a_cmnd->device->hostdata;
> @@ -5243,10 +5233,6 @@ static void stop_all_queued(bool done_with_no_conn)
>                                         l_defer_t = SDEB_DEFER_NONE;
>                                 spin_unlock_irqrestore(&sqp->qc_lock, iflags);
>                                 stop_qc_helper(sd_dp, l_defer_t);
> -                               if (done_with_no_conn && l_defer_t != SDEB_DEFER_NONE) {
> -                                       scp->result = DID_NO_CONNECT << 16;
> -                                       scsi_done(scp);
> -                               }
>                                 clear_bit(k, sqp->in_use_bm);
>                                 spin_lock_irqsave(&sqp->qc_lock, iflags);
>                         }
> @@ -5389,7 +5375,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt)
>                 }
>         }
>         spin_unlock(&sdebug_host_list_lock);
> -       stop_all_queued(false);
> +       stop_all_queued();
>         if (SDEBUG_OPT_RESET_NOISE & sdebug_opts)
>                 sdev_printk(KERN_INFO, SCpnt->device,
>                             "%s: %d device(s) found\n", __func__, k);
> @@ -5449,50 +5435,13 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size)
>         }
>  }
>
> -static void sdeb_block_all_queues(void)
> -{
> -       int j;
> -       struct sdebug_queue *sqp;
> -
> -       for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
> -               atomic_set(&sqp->blocked, (int)true);
> -}
> -
> -static void sdeb_unblock_all_queues(void)
> +static void block_unblock_all_queues(bool block)
>  {
>         int j;
>         struct sdebug_queue *sqp;
>
>         for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
> -               atomic_set(&sqp->blocked, (int)false);
> -}
> -
> -static void
> -sdeb_add_n_hosts(int num_hosts)
> -{
> -       if (num_hosts < 1)
> -               return;
> -       do {
> -               bool found;
> -               unsigned long idx;
> -               struct sdeb_store_info *sip;
> -               bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
> -
> -               found = false;
> -               if (want_phs) {
> -                       xa_for_each_marked(per_store_ap, idx, sip, SDEB_XA_NOT_IN_USE) {
> -                               sdeb_most_recent_idx = (int)idx;
> -                               found = true;
> -                               break;
> -                       }
> -                       if (found)      /* re-use case */
> -                               sdebug_add_host_helper((int)idx);
> -                       else
> -                               sdebug_do_add_host(true /* make new store */);
> -               } else {
> -                       sdebug_do_add_host(false);
> -               }
> -       } while (--num_hosts);
> +               atomic_set(&sqp->blocked, (int)block);
>  }
>
>  /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1
> @@ -5505,10 +5454,10 @@ static void tweak_cmnd_count(void)
>         modulo = abs(sdebug_every_nth);
>         if (modulo < 2)
>                 return;
> -       sdeb_block_all_queues();
> +       block_unblock_all_queues(true);
>         count = atomic_read(&sdebug_cmnd_count);
>         atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo);
> -       sdeb_unblock_all_queues();
> +       block_unblock_all_queues(false);
>  }
>
>  static void clear_queue_stats(void)
> @@ -5526,15 +5475,6 @@ static bool inject_on_this_cmd(void)
>         return (atomic_read(&sdebug_cmnd_count) % abs(sdebug_every_nth)) == 0;
>  }
>
> -static int process_deflect_incoming(struct scsi_cmnd *scp)
> -{
> -       u8 opcode = scp->cmnd[0];
> -
> -       if (opcode == SYNCHRONIZE_CACHE || opcode == SYNCHRONIZE_CACHE_16)
> -               return 0;
> -       return DID_NO_CONNECT << 16;
> -}
> -
>  #define INCLUSIVE_TIMING_MAX_NS 1000000                /* 1 millisecond */
>
>  /* Complete the processing of the thread that queued a SCSI command to this
> @@ -5544,7 +5484,8 @@ static int process_deflect_incoming(struct scsi_cmnd *scp)
>   */
>  static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>                          int scsi_result,
> -                        int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *),
> +                        int (*pfp)(struct scsi_cmnd *,
> +                                   struct sdebug_dev_info *),
>                          int delta_jiff, int ndelay)
>  {
>         bool new_sd_dp;
> @@ -5565,27 +5506,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>         }
>         sdp = cmnd->device;
>
> -       if (delta_jiff == 0) {
> -               sqp = get_queue(cmnd);
> -               if (atomic_read(&sqp->blocked)) {
> -                       if (smp_load_acquire(&sdebug_deflect_incoming))
> -                               return process_deflect_incoming(cmnd);
> -                       else
> -                               return SCSI_MLQUEUE_HOST_BUSY;
> -               }
> +       if (delta_jiff == 0)
>                 goto respond_in_thread;
> -       }
>
>         sqp = get_queue(cmnd);
>         spin_lock_irqsave(&sqp->qc_lock, iflags);
>         if (unlikely(atomic_read(&sqp->blocked))) {
>                 spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> -               if (smp_load_acquire(&sdebug_deflect_incoming)) {
> -                       scsi_result = process_deflect_incoming(cmnd);
> -                       goto respond_in_thread;
> -               }
> -               if (sdebug_verbose)
> -                       pr_info("blocked --> SCSI_MLQUEUE_HOST_BUSY\n");
>                 return SCSI_MLQUEUE_HOST_BUSY;
>         }
>         num_in_q = atomic_read(&devip->num_in_q);
> @@ -5774,12 +5701,8 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>  respond_in_thread:     /* call back to mid-layer using invocation thread */
>         cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0;
>         cmnd->result &= ~SDEG_RES_IMMED_MASK;
> -       if (cmnd->result == 0 && scsi_result != 0) {
> +       if (cmnd->result == 0 && scsi_result != 0)
>                 cmnd->result = scsi_result;
> -               if (sdebug_verbose)
> -                       pr_info("respond_in_thread: tag=0x%x, scp->result=0x%x\n",
> -                               blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)), scsi_result);
> -       }
>         scsi_done(cmnd);
>         return 0;
>  }
> @@ -6064,7 +5987,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
>                         int j, k;
>                         struct sdebug_queue *sqp;
>
> -                       sdeb_block_all_queues();
> +                       block_unblock_all_queues(true);
>                         for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
>                              ++j, ++sqp) {
>                                 k = find_first_bit(sqp->in_use_bm,
> @@ -6078,7 +6001,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
>                                 sdebug_jdelay = jdelay;
>                                 sdebug_ndelay = 0;
>                         }
> -                       sdeb_unblock_all_queues();
> +                       block_unblock_all_queues(false);
>                 }
>                 return res;
>         }
> @@ -6104,7 +6027,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
>                         int j, k;
>                         struct sdebug_queue *sqp;
>
> -                       sdeb_block_all_queues();
> +                       block_unblock_all_queues(true);
>                         for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
>                              ++j, ++sqp) {
>                                 k = find_first_bit(sqp->in_use_bm,
> @@ -6119,7 +6042,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
>                                 sdebug_jdelay = ndelay  ? JDELAY_OVERRIDDEN
>                                                         : DEF_JDELAY;
>                         }
> -                       sdeb_unblock_all_queues();
> +                       block_unblock_all_queues(false);
>                 }
>                 return res;
>         }
> @@ -6433,7 +6356,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
>         if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
>             (n <= SDEBUG_CANQUEUE) &&
>             (sdebug_host_max_queue == 0)) {
> -               sdeb_block_all_queues();
> +               block_unblock_all_queues(true);
>                 k = 0;
>                 for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
>                      ++j, ++sqp) {
> @@ -6448,7 +6371,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
>                         atomic_set(&retired_max_queue, k + 1);
>                 else
>                         atomic_set(&retired_max_queue, 0);
> -               sdeb_unblock_all_queues();
> +               block_unblock_all_queues(false);
>                 return count;
>         }
>         return -EINVAL;
> @@ -6537,48 +6460,43 @@ static DRIVER_ATTR_RW(virtual_gb);
>  static ssize_t add_host_show(struct device_driver *ddp, char *buf)
>  {
>         /* absolute number of hosts currently active is what is shown */
> -       return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&sdebug_num_hosts));
> +       return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts);
>  }
>
> -/*
> - * Accept positive and negative values. Hex values (only positive) may be prefixed by '0x'.
> - * To remove all hosts use a large negative number (e.g. -9999). The value 0 does nothing.
> - * Returns -EBUSY if another add_host sysfs invocation is active.
> - */
>  static ssize_t add_host_store(struct device_driver *ddp, const char *buf,
>                               size_t count)
>  {
> +       bool found;
> +       unsigned long idx;
> +       struct sdeb_store_info *sip;
> +       bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
>         int delta_hosts;
>
> -       if (count == 0 || kstrtoint(buf, 0, &delta_hosts))
> +       if (sscanf(buf, "%d", &delta_hosts) != 1)
>                 return -EINVAL;
> -       if (sdebug_verbose)
> -               pr_info("prior num_hosts=%d, num_to_add=%d\n",
> -                       atomic_read(&sdebug_num_hosts), delta_hosts);
> -       if (delta_hosts == 0)
> -               return count;
> -       if (mutex_trylock(&add_host_mutex) == 0)
> -               return -EBUSY;
>         if (delta_hosts > 0) {
> -               sdeb_add_n_hosts(delta_hosts);
> -       } else if (delta_hosts < 0) {
> -               smp_store_release(&sdebug_deflect_incoming, true);
> -               sdeb_block_all_queues();
> -               if (delta_hosts >= atomic_read(&sdebug_num_hosts))
> -                       stop_all_queued(true);
>                 do {
> -                       if (atomic_read(&sdebug_num_hosts) < 1) {
> -                               free_all_queued();
> -                               break;
> +                       found = false;
> +                       if (want_phs) {
> +                               xa_for_each_marked(per_store_ap, idx, sip,
> +                                                  SDEB_XA_NOT_IN_USE) {
> +                                       sdeb_most_recent_idx = (int)idx;
> +                                       found = true;
> +                                       break;
> +                               }
> +                               if (found)      /* re-use case */
> +                                       sdebug_add_host_helper((int)idx);
> +                               else
> +                                       sdebug_do_add_host(true);
> +                       } else {
> +                               sdebug_do_add_host(false);
>                         }
> +               } while (--delta_hosts);
> +       } else if (delta_hosts < 0) {
> +               do {
>                         sdebug_do_remove_host(false);
>                 } while (++delta_hosts);
> -               sdeb_unblock_all_queues();
> -               smp_store_release(&sdebug_deflect_incoming, false);
>         }
> -       mutex_unlock(&add_host_mutex);
> -       if (sdebug_verbose)
> -               pr_info("post num_hosts=%d\n", atomic_read(&sdebug_num_hosts));
>         return count;
>  }
>  static DRIVER_ATTR_RW(add_host);
> @@ -7089,10 +7007,6 @@ static int __init scsi_debug_init(void)
>         sdebug_add_host = 0;
>
>         for (k = 0; k < hosts_to_add; k++) {
> -               if (smp_load_acquire(&sdebug_deflect_incoming)) {
> -                       pr_info("exit early as sdebug_deflect_incoming is set\n");
> -                       return 0;
> -               }
>                 if (want_store && k == 0) {
>                         ret = sdebug_add_host_helper(idx);
>                         if (ret < 0) {
> @@ -7110,12 +7024,8 @@ static int __init scsi_debug_init(void)
>                 }
>         }
>         if (sdebug_verbose)
> -               pr_info("built %d host(s)\n", atomic_read(&sdebug_num_hosts));
> +               pr_info("built %d host(s)\n", sdebug_num_hosts);
>
> -       /*
> -        * Even though all the hosts have been established, due to async device (LU) scanning
> -        * by the scsi mid-level, there may still be devices (LUs) being set up.
> -        */
>         return 0;
>
>  bus_unreg:
> @@ -7131,17 +7041,12 @@ static int __init scsi_debug_init(void)
>
>  static void __exit scsi_debug_exit(void)
>  {
> -       int k;
> +       int k = sdebug_num_hosts;
>
> -       /* Possible race with LUs still being set up; stop them asap */
> -       sdeb_block_all_queues();
> -       smp_store_release(&sdebug_deflect_incoming, true);
> -       stop_all_queued(false);
> -       for (k = 0; atomic_read(&sdebug_num_hosts) > 0; k++)
> +       stop_all_queued();
> +       for (; k; k--)
>                 sdebug_do_remove_host(true);
>         free_all_queued();
> -       if (sdebug_verbose)
> -               pr_info("removed %d hosts\n", k);
>         driver_unregister(&sdebug_driverfs_driver);
>         bus_unregister(&pseudo_lld_bus);
>         root_device_unregister(pseudo_primary);
> @@ -7311,13 +7216,13 @@ static int sdebug_add_host_helper(int per_host_idx)
>         sdbg_host->dev.bus = &pseudo_lld_bus;
>         sdbg_host->dev.parent = pseudo_primary;
>         sdbg_host->dev.release = &sdebug_release_adapter;
> -       dev_set_name(&sdbg_host->dev, "adapter%d", atomic_read(&sdebug_num_hosts));
> +       dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts);
>
>         error = device_register(&sdbg_host->dev);
>         if (error)
>                 goto clean;
>
> -       atomic_inc(&sdebug_num_hosts);
> +       ++sdebug_num_hosts;
>         return 0;
>
>  clean:
> @@ -7381,7 +7286,7 @@ static void sdebug_do_remove_host(bool the_end)
>                 return;
>
>         device_unregister(&sdbg_host->dev);
> -       atomic_dec(&sdebug_num_hosts);
> +       --sdebug_num_hosts;
>  }
>
>  static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
> @@ -7389,10 +7294,10 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
>         int num_in_q = 0;
>         struct sdebug_dev_info *devip;
>
> -       sdeb_block_all_queues();
> +       block_unblock_all_queues(true);
>         devip = (struct sdebug_dev_info *)sdev->hostdata;
>         if (NULL == devip) {
> -               sdeb_unblock_all_queues();
> +               block_unblock_all_queues(false);
>                 return  -ENODEV;
>         }
>         num_in_q = atomic_read(&devip->num_in_q);
> @@ -7411,7 +7316,7 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
>                 sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n",
>                             __func__, qdepth, num_in_q);
>         }
> -       sdeb_unblock_all_queues();
> +       block_unblock_all_queues(false);
>         return sdev->queue_depth;
>  }
>
>


-- 
Best Regards,
  Yi Zhang


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-10 13:15 ` Yi Zhang
@ 2022-04-10 17:00   ` Bob Pearson
  2022-04-10 17:26     ` Bob Pearson
  0 siblings, 1 reply; 8+ messages in thread
From: Bob Pearson @ 2022-04-10 17:00 UTC (permalink / raw)
  To: Yi Zhang, Bart Van Assche, Jason Gunthorpe, Zhu Yanjun
  Cc: Martin K . Petersen, linux-scsi, Douglas Gilbert, James E.J. Bottomley

On 4/10/22 08:15, Yi Zhang wrote:
> Confirmed the blktests srp/ issue was fixed with this revert:
> 
> Tested-by: Yi Zhang <yi.zhang@redhat.com>
> 
> On Sat, Apr 9, 2022 at 12:37 PM Bart Van Assche <bvanassche@acm.org> wrote:
>>
>> Revert the patch mentioned in the subject since it blocks I/O after
>> module unload has started while this is a legitimate use case. For e.g.
>> blktests test case srp/001 that patch causes a command timeout to be
>> triggered for the following call stack:
>>

I also applied Bart's patch reverting the scsi_debug change. And find
that now blktest/check -q srp runs successfully and the trace does not
show the inconsistent lock state warning. So it looks to me that there
is not a current need to revert the remaining _bh locks in the rxe driver
to _irqsave locks.

But, as far as I know the root cause of those warnings, when valid, are due to
code that calls into the verbs APIs while holding _irqsave locks. If we want to
make that generally possible then perhaps we should just get rid of the _bh
locks in favor of _irqsave locks. I have a patch that does that if you think
it is needed.

Bob

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-10 17:00   ` Bob Pearson
@ 2022-04-10 17:26     ` Bob Pearson
  0 siblings, 0 replies; 8+ messages in thread
From: Bob Pearson @ 2022-04-10 17:26 UTC (permalink / raw)
  To: Yi Zhang, Bart Van Assche, Jason Gunthorpe, Zhu Yanjun
  Cc: Martin K . Petersen, linux-scsi, Douglas Gilbert, James E.J. Bottomley

On 4/10/22 12:00, Bob Pearson wrote:
> On 4/10/22 08:15, Yi Zhang wrote:
>> Confirmed the blktests srp/ issue was fixed with this revert:
>>
>> Tested-by: Yi Zhang <yi.zhang@redhat.com>
>>
>> On Sat, Apr 9, 2022 at 12:37 PM Bart Van Assche <bvanassche@acm.org> wrote:
>>>
>>> Revert the patch mentioned in the subject since it blocks I/O after
>>> module unload has started while this is a legitimate use case. For e.g.
>>> blktests test case srp/001 that patch causes a command timeout to be
>>> triggered for the following call stack:
>>>
> 
> I also applied Bart's patch reverting the scsi_debug change. And find
> that now blktest/check -q srp runs successfully and the trace does not
> show the inconsistent lock state warning. So it looks to me that there
> is not a current need to revert the remaining _bh locks in the rxe driver
> to _irqsave locks.
> 
> But, as far as I know the root cause of those warnings, when valid, are due to
> code that calls into the verbs APIs while holding _irqsave locks. If we want to
> make that generally possible then perhaps we should just get rid of the _bh
> locks in favor of _irqsave locks. I have a patch that does that if you think
> it is needed.
> 
> Bob

Oops. I was ahead of for-next. Going back to origin/for-next. The warning is back.
I'll figure out what fixed it.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-09  4:37 [PATCH] Revert "scsi: scsi_debug: Address races following module load" Bart Van Assche
  2022-04-09 19:10 ` Douglas Gilbert
  2022-04-10 13:15 ` Yi Zhang
@ 2022-04-12  2:36 ` Martin K. Petersen
  2 siblings, 0 replies; 8+ messages in thread
From: Martin K. Petersen @ 2022-04-12  2:36 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Martin K . Petersen, James E.J. Bottomley, linux-scsi,
	Douglas Gilbert, Yi Zhang, Bob Pearson

On Fri, 8 Apr 2022 21:37:03 -0700, Bart Van Assche wrote:

> Revert the patch mentioned in the subject since it blocks I/O after
> module unload has started while this is a legitimate use case. For e.g.
> blktests test case srp/001 that patch causes a command timeout to be
> triggered for the following call stack:
> 
> __schedule+0x4c3/0xd20
> schedule+0x82/0x110
> schedule_timeout+0x122/0x200
> io_schedule_timeout+0x7b/0xc0
> __wait_for_common+0x2bc/0x380
> wait_for_completion_io_timeout+0x1d/0x20
> blk_execute_rq+0x1db/0x200
> __scsi_execute+0x1fb/0x310
> sd_sync_cache+0x155/0x2c0 [sd_mod]
> sd_shutdown+0xbb/0x190 [sd_mod]
> sd_remove+0x5b/0x80 [sd_mod]
> device_remove+0x9a/0xb0
> device_release_driver_internal+0x2c5/0x360
> device_release_driver+0x12/0x20
> bus_remove_device+0x1aa/0x270
> device_del+0x2d4/0x640
> __scsi_remove_device+0x168/0x1a0
> scsi_forget_host+0xa8/0xb0
> scsi_remove_host+0x9b/0x150
> sdebug_driver_remove+0x3d/0x140 [scsi_debug]
> device_remove+0x6f/0xb0
> device_release_driver_internal+0x2c5/0x360
> device_release_driver+0x12/0x20
> bus_remove_device+0x1aa/0x270
> device_del+0x2d4/0x640
> device_unregister+0x18/0x70
> sdebug_do_remove_host+0x138/0x180 [scsi_debug]
> scsi_debug_exit+0x45/0xd5 [scsi_debug]
> __do_sys_delete_module.constprop.0+0x210/0x320
> __x64_sys_delete_module+0x1f/0x30
> do_syscall_64+0x35/0x80
> entry_SYSCALL_64_after_hwframe+0x44/0xae
> 
> [...]

Applied to 5.18/scsi-fixes, thanks!

[1/1] Revert "scsi: scsi_debug: Address races following module load"
      https://git.kernel.org/mkp/scsi/c/f19fe8f354a6

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-09 19:10 ` Douglas Gilbert
@ 2022-04-12 17:52   ` Luis Chamberlain
  2022-04-12 18:22     ` Bart Van Assche
  0 siblings, 1 reply; 8+ messages in thread
From: Luis Chamberlain @ 2022-04-12 17:52 UTC (permalink / raw)
  To: Douglas Gilbert
  Cc: Bart Van Assche, Martin K . Petersen, linux-scsi, Yi Zhang,
	Bob Pearson, James E.J. Bottomley

On Sat, Apr 09, 2022 at 03:10:14PM -0400, Douglas Gilbert wrote:
> On 2022-04-09 00:37, Bart Van Assche wrote:
> > Revert the patch mentioned in the subject since it blocks I/O after
> > module unload has started while this is a legitimate use case. For e.g.
> > blktests test case srp/001 that patch causes a command timeout to be
> > triggered for the following call stack:
> > 
> > __schedule+0x4c3/0xd20
> > schedule+0x82/0x110
> > schedule_timeout+0x122/0x200
> > io_schedule_timeout+0x7b/0xc0
> > __wait_for_common+0x2bc/0x380
> > wait_for_completion_io_timeout+0x1d/0x20
> > blk_execute_rq+0x1db/0x200
> > __scsi_execute+0x1fb/0x310
> > sd_sync_cache+0x155/0x2c0 [sd_mod]
> > sd_shutdown+0xbb/0x190 [sd_mod]
> > sd_remove+0x5b/0x80 [sd_mod]
> > device_remove+0x9a/0xb0
> > device_release_driver_internal+0x2c5/0x360
> > device_release_driver+0x12/0x20
> > bus_remove_device+0x1aa/0x270
> > device_del+0x2d4/0x640
> > __scsi_remove_device+0x168/0x1a0
> > scsi_forget_host+0xa8/0xb0
> > scsi_remove_host+0x9b/0x150
> > sdebug_driver_remove+0x3d/0x140 [scsi_debug]
> > device_remove+0x6f/0xb0
> > device_release_driver_internal+0x2c5/0x360
> > device_release_driver+0x12/0x20
> > bus_remove_device+0x1aa/0x270
> > device_del+0x2d4/0x640
> > device_unregister+0x18/0x70
> > sdebug_do_remove_host+0x138/0x180 [scsi_debug]
> > scsi_debug_exit+0x45/0xd5 [scsi_debug]
> > __do_sys_delete_module.constprop.0+0x210/0x320
> > __x64_sys_delete_module+0x1f/0x30
> > do_syscall_64+0x35/0x80
> > entry_SYSCALL_64_after_hwframe+0x44/0xae
> > 
> > Reported-by: Yi Zhang <yi.zhang@redhat.com>
> > Cc: Douglas Gilbert <dgilbert@interlog.com>
> > Cc: Yi Zhang <yi.zhang@redhat.com>
> > Cc: Bob Pearson <rpearsonhpe@gmail.com>
> > Fixes: 2aad3cd85370 ("scsi: scsi_debug: Address races following module load"; )
> > Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> 
> Acked-by: Douglas Gilbert <dgilbert@interlog.com>
> 
> This was a relatively old patch developed in conjunction with Luis Chamberlain
> <mcgrof@kernel.org>. So it may have been overtaken by other developments.
> I forwarded the "[bug report][bisected] modprob -r scsi-debug take more than
> 3mins during blktests srp/ tests" email to Luis but haven't heard back. So I'm
> happy to remove it.

Upstream patient module removal inside kmod will help and is the right
thing to do all around. However modules can also strive to make the
removal painless too. How much work a module does to make it painless is
up to its authors. In lieu of kmod patient module removal, users of the
module should be aware of these issue though and open code it as I have
in fstests [0] as an example.

Note that kmod patient module removal is not yet merged but soon will
be.

[0] https://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git/commit/?id=d405c21d40aa1f0ca846dd144a1a7731e55679b2

  Luis

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Revert "scsi: scsi_debug: Address races following module load"
  2022-04-12 17:52   ` Luis Chamberlain
@ 2022-04-12 18:22     ` Bart Van Assche
  0 siblings, 0 replies; 8+ messages in thread
From: Bart Van Assche @ 2022-04-12 18:22 UTC (permalink / raw)
  To: Luis Chamberlain, Douglas Gilbert
  Cc: Martin K . Petersen, linux-scsi, Yi Zhang, Bob Pearson,
	James E.J. Bottomley

On 4/12/22 10:52, Luis Chamberlain wrote:
> Upstream patient module removal inside kmod will help and is the right
> thing to do all around. However modules can also strive to make the
> removal painless too. How much work a module does to make it painless is
> up to its authors. In lieu of kmod patient module removal, users of the
> module should be aware of these issue though and open code it as I have
> in fstests [0] as an example.

If a new version of patch "Address races following module load" is posted I
will help with reviewing it.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-04-12 18:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-09  4:37 [PATCH] Revert "scsi: scsi_debug: Address races following module load" Bart Van Assche
2022-04-09 19:10 ` Douglas Gilbert
2022-04-12 17:52   ` Luis Chamberlain
2022-04-12 18:22     ` Bart Van Assche
2022-04-10 13:15 ` Yi Zhang
2022-04-10 17:00   ` Bob Pearson
2022-04-10 17:26     ` Bob Pearson
2022-04-12  2:36 ` Martin K. Petersen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.