* [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
@ 2023-05-19 23:08 Sagar Biradar
2023-05-19 23:18 ` kernel test robot
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Sagar Biradar @ 2023-05-19 23:08 UTC (permalink / raw)
To: Don Brace, Sagar Biradar, Gilbert Wu, linux-scsi,
Martin Petersen, James Bottomley, Brian King, stable, Tom White
Fix the IO hang that arises because of MSIx vector not
having a mapped online CPU upon receiving completion.
The SCSI cmds take the blk_mq route, which is setup during the init.
The reserved cmds fetch the vector_no from mq_map after the init
is complete and before the init, they use 0 - as per the norm.
Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
---
drivers/scsi/aacraid/aacraid.h | 1 +
drivers/scsi/aacraid/comminit.c | 1 -
drivers/scsi/aacraid/commsup.c | 6 +++++-
drivers/scsi/aacraid/linit.c | 14 ++++++++++++++
drivers/scsi/aacraid/src.c | 25 +++++++++++++++++++++++--
5 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 5e115e8b2ba4..7c6efde75da6 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1678,6 +1678,7 @@ struct aac_dev
u32 handle_pci_error;
bool init_reset;
u8 soft_reset_support;
+ u8 use_map_queue;
};
#define aac_adapter_interrupt(dev) \
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index bd99c5492b7d..a5483e7e283a 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -657,4 +657,3 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
return dev;
}
-
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index deb32c9f4b3e..3f062e4013ab 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
{
struct fib *fibptr;
+ u32 blk_tag;
+ int i;
- fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ i = blk_mq_unique_tag_to_tag(blk_tag);
+ fibptr = &dev->fibs[i];
/*
* Null out fields that depend on being zero at the start of
* each I/O
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 5ba5c18b77b4..9caf8c314ce1 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -19,6 +19,7 @@
#include <linux/compat.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq-pci.h>
#include <linux/completion.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -505,6 +506,15 @@ static int aac_slave_configure(struct scsi_device *sdev)
return 0;
}
+static void aac_map_queues(struct Scsi_Host *shost)
+{
+ struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
+
+ blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+ aac->pdev, 0);
+ aac->use_map_queue = true;
+}
+
/**
* aac_change_queue_depth - alter queue depths
* @sdev: SCSI device we are considering
@@ -1489,6 +1499,7 @@ static struct scsi_host_template aac_driver_template = {
.bios_param = aac_biosparm,
.shost_groups = aac_host_groups,
.slave_configure = aac_slave_configure,
+ .map_queues = aac_map_queues,
.change_queue_depth = aac_change_queue_depth,
.sdev_groups = aac_dev_groups,
.eh_abort_handler = aac_eh_abort,
@@ -1776,6 +1787,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
shost->max_lun = AAC_MAX_LUN;
pci_set_drvdata(pdev, shost);
+ shost->nr_hw_queues = aac->max_msix;
+ shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev);
if (error)
@@ -1908,6 +1921,7 @@ static void aac_remove_one(struct pci_dev *pdev)
struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
aac_cancel_rescan_worker(aac);
+ aac->use_map_queue = false;
scsi_remove_host(shost);
__aac_shutdown(aac);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 11ef58204e96..61949f374188 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib)
#endif
u16 vector_no;
+ struct scsi_cmnd *scmd;
+ u32 blk_tag;
+ struct Scsi_Host *shost = dev->scsi_host_ptr;
+ struct blk_mq_queue_map *qmap;
atomic_inc(&q->numpending);
@@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib)
if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
&& dev->sa_firmware)
vector_no = aac_get_vector(dev);
- else
- vector_no = fib->vector_no;
+ else {
+ if (!fib->vector_no || !fib->callback_data) {
+ if (shost && dev->use_map_queue) {
+ qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+ vector_no = qmap->mq_map[raw_smp_processor_id()];
+ }
+ /*
+ * We hardcode the vector_no for
+ * reserved commands as a valid shost is
+ * absent during the init
+ */
+ else
+ vector_no = 0;
+ } else {
+ scmd = (struct scsi_cmnd *)fib->callback_data;
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
+ }
+ }
if (native_hba) {
if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
--
2.29.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-05-19 23:08 [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
@ 2023-05-19 23:18 ` kernel test robot
2023-06-08 21:09 ` Sagar.Biradar
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-05-19 23:18 UTC (permalink / raw)
To: Sagar Biradar; +Cc: stable, oe-kbuild-all
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
Rule: 'Cc: stable@vger.kernel.org' or 'commit <sha1> upstream.'
Subject: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
Link: https://lore.kernel.org/stable/20230519230834.27436-1-sagar.biradar%40microchip.com
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-05-19 23:08 [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
2023-05-19 23:18 ` kernel test robot
@ 2023-06-08 21:09 ` Sagar.Biradar
2023-06-15 2:15 ` Martin K. Petersen
2023-11-23 12:01 ` John Garry
3 siblings, 0 replies; 7+ messages in thread
From: Sagar.Biradar @ 2023-06-08 21:09 UTC (permalink / raw)
To: Sagar.Biradar, Don.Brace, Gilbert.Wu, linux-scsi,
martin.petersen, jejb, brking, stable, Tom.White
Hi James et al.
Since there were no activity/comments - I pinged to check if this patch was in line to be accepted/merged.
I have addressed the comments on the earlier versions of the patch.
Thanks
-----Original Message-----
From: Sagar Biradar <sagar.biradar@microchip.com>
Sent: Friday, May 19, 2023 4:09 PM
To: Don Brace - C33706 <Don.Brace@microchip.com>; Sagar Biradar - C34249 <Sagar.Biradar@microchip.com>; Gilbert Wu - C33504 <Gilbert.Wu@microchip.com>; linux-scsi@vger.kernel.org; Martin Petersen <martin.petersen@oracle.com>; James Bottomley <jejb@linux.ibm.com>; Brian King <brking@linux.vnet.ibm.com>; stable@vger.kernel.org; Tom White - C33503 <Tom.White@microchip.com>
Subject: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
Fix the IO hang that arises because of MSIx vector not having a mapped online CPU upon receiving completion.
The SCSI cmds take the blk_mq route, which is setup during the init.
The reserved cmds fetch the vector_no from mq_map after the init is complete and before the init, they use 0 - as per the norm.
Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
---
drivers/scsi/aacraid/aacraid.h | 1 +
drivers/scsi/aacraid/comminit.c | 1 -
drivers/scsi/aacraid/commsup.c | 6 +++++-
drivers/scsi/aacraid/linit.c | 14 ++++++++++++++
drivers/scsi/aacraid/src.c | 25 +++++++++++++++++++++++--
5 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 5e115e8b2ba4..7c6efde75da6 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1678,6 +1678,7 @@ struct aac_dev
u32 handle_pci_error;
bool init_reset;
u8 soft_reset_support;
+ u8 use_map_queue;
};
#define aac_adapter_interrupt(dev) \
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index bd99c5492b7d..a5483e7e283a 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -657,4 +657,3 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
return dev;
}
-
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index deb32c9f4b3e..3f062e4013ab 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) {
struct fib *fibptr;
+ u32 blk_tag;
+ int i;
- fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ i = blk_mq_unique_tag_to_tag(blk_tag);
+ fibptr = &dev->fibs[i];
/*
* Null out fields that depend on being zero at the start of
* each I/O
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 5ba5c18b77b4..9caf8c314ce1 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -19,6 +19,7 @@
#include <linux/compat.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq-pci.h>
#include <linux/completion.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -505,6 +506,15 @@ static int aac_slave_configure(struct scsi_device *sdev)
return 0;
}
+static void aac_map_queues(struct Scsi_Host *shost) {
+ struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
+
+ blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+ aac->pdev, 0);
+ aac->use_map_queue = true;
+}
+
/**
* aac_change_queue_depth - alter queue depths
* @sdev: SCSI device we are considering
@@ -1489,6 +1499,7 @@ static struct scsi_host_template aac_driver_template = {
.bios_param = aac_biosparm,
.shost_groups = aac_host_groups,
.slave_configure = aac_slave_configure,
+ .map_queues = aac_map_queues,
.change_queue_depth = aac_change_queue_depth,
.sdev_groups = aac_dev_groups,
.eh_abort_handler = aac_eh_abort,
@@ -1776,6 +1787,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
shost->max_lun = AAC_MAX_LUN;
pci_set_drvdata(pdev, shost);
+ shost->nr_hw_queues = aac->max_msix;
+ shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev);
if (error)
@@ -1908,6 +1921,7 @@ static void aac_remove_one(struct pci_dev *pdev)
struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
aac_cancel_rescan_worker(aac);
+ aac->use_map_queue = false;
scsi_remove_host(shost);
__aac_shutdown(aac);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 11ef58204e96..61949f374188 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib) #endif
u16 vector_no;
+ struct scsi_cmnd *scmd;
+ u32 blk_tag;
+ struct Scsi_Host *shost = dev->scsi_host_ptr;
+ struct blk_mq_queue_map *qmap;
atomic_inc(&q->numpending);
@@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib)
if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
&& dev->sa_firmware)
vector_no = aac_get_vector(dev);
- else
- vector_no = fib->vector_no;
+ else {
+ if (!fib->vector_no || !fib->callback_data) {
+ if (shost && dev->use_map_queue) {
+ qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+ vector_no = qmap->mq_map[raw_smp_processor_id()];
+ }
+ /*
+ * We hardcode the vector_no for
+ * reserved commands as a valid shost is
+ * absent during the init
+ */
+ else
+ vector_no = 0;
+ } else {
+ scmd = (struct scsi_cmnd *)fib->callback_data;
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
+ }
+ }
if (native_hba) {
if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
--
2.29.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-05-19 23:08 [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
2023-05-19 23:18 ` kernel test robot
2023-06-08 21:09 ` Sagar.Biradar
@ 2023-06-15 2:15 ` Martin K. Petersen
2023-11-23 12:01 ` John Garry
3 siblings, 0 replies; 7+ messages in thread
From: Martin K. Petersen @ 2023-06-15 2:15 UTC (permalink / raw)
To: Don Brace, Gilbert Wu, linux-scsi, James Bottomley, Brian King,
stable, Tom White, Sagar Biradar
Cc: Martin K . Petersen
On Fri, 19 May 2023 16:08:34 -0700, Sagar Biradar wrote:
> Fix the IO hang that arises because of MSIx vector not
> having a mapped online CPU upon receiving completion.
>
> The SCSI cmds take the blk_mq route, which is setup during the init.
> The reserved cmds fetch the vector_no from mq_map after the init
> is complete and before the init, they use 0 - as per the norm.
>
> [...]
Applied to 6.4/scsi-fixes, thanks!
[1/1] aacraid: reply queue mapping to CPUs based of IRQ affinity
https://git.kernel.org/mkp/scsi/c/9dc704dcc09e
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-05-19 23:08 [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
` (2 preceding siblings ...)
2023-06-15 2:15 ` Martin K. Petersen
@ 2023-11-23 12:01 ` John Garry
2023-11-23 12:51 ` James Bottomley
2023-11-24 6:32 ` Hannes Reinecke
3 siblings, 2 replies; 7+ messages in thread
From: John Garry @ 2023-11-23 12:01 UTC (permalink / raw)
To: Sagar Biradar, Don Brace, Gilbert Wu, linux-scsi,
Martin Petersen, James Bottomley, Brian King, stable, Tom White,
regressions, hare
On 20/05/2023 00:08, Sagar Biradar wrote:
> Fix the IO hang that arises because of MSIx vector not
> having a mapped online CPU upon receiving completion.
>
> The SCSI cmds take the blk_mq route, which is setup during the init.
> The reserved cmds fetch the vector_no from mq_map after the init
> is complete and before the init, they use 0 - as per the norm.
>
> Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
> Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
This the patch which seems to be causing the issue in
https://bugzilla.kernel.org/show_bug.cgi?id=217599
I will comment here since I got no response there...
> ---
> drivers/scsi/aacraid/aacraid.h | 1 +
> drivers/scsi/aacraid/comminit.c | 1 -
> drivers/scsi/aacraid/commsup.c | 6 +++++-
> drivers/scsi/aacraid/linit.c | 14 ++++++++++++++
> drivers/scsi/aacraid/src.c | 25 +++++++++++++++++++++++--
> 5 files changed, 43 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
> index 5e115e8b2ba4..7c6efde75da6 100644
> --- a/drivers/scsi/aacraid/aacraid.h
> +++ b/drivers/scsi/aacraid/aacraid.h
> @@ -1678,6 +1678,7 @@ struct aac_dev
> u32 handle_pci_error;
> bool init_reset;
> u8 soft_reset_support;
> + u8 use_map_queue;
> };
>
> #define aac_adapter_interrupt(dev) \
> diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
> index bd99c5492b7d..a5483e7e283a 100644
> --- a/drivers/scsi/aacraid/comminit.c
> +++ b/drivers/scsi/aacraid/comminit.c
> @@ -657,4 +657,3 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
>
> return dev;
> }
> -
> diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
> index deb32c9f4b3e..3f062e4013ab 100644
> --- a/drivers/scsi/aacraid/commsup.c
> +++ b/drivers/scsi/aacraid/commsup.c
> @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
> struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
> {
> struct fib *fibptr;
> + u32 blk_tag;
> + int i;
>
> - fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
> + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> + i = blk_mq_unique_tag_to_tag(blk_tag);
> + fibptr = &dev->fibs[i];
> /*
> * Null out fields that depend on being zero at the start of
> * each I/O
> diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
> index 5ba5c18b77b4..9caf8c314ce1 100644
> --- a/drivers/scsi/aacraid/linit.c
> +++ b/drivers/scsi/aacraid/linit.c
> @@ -19,6 +19,7 @@
>
> #include <linux/compat.h>
> #include <linux/blkdev.h>
> +#include <linux/blk-mq-pci.h>
> #include <linux/completion.h>
> #include <linux/init.h>
> #include <linux/interrupt.h>
> @@ -505,6 +506,15 @@ static int aac_slave_configure(struct scsi_device *sdev)
> return 0;
> }
>
> +static void aac_map_queues(struct Scsi_Host *shost)
> +{
> + struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
> +
> + blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
> + aac->pdev, 0);
> + aac->use_map_queue = true;
> +}
> +
> /**
> * aac_change_queue_depth - alter queue depths
> * @sdev: SCSI device we are considering
> @@ -1489,6 +1499,7 @@ static struct scsi_host_template aac_driver_template = {
> .bios_param = aac_biosparm,
> .shost_groups = aac_host_groups,
> .slave_configure = aac_slave_configure,
> + .map_queues = aac_map_queues,
> .change_queue_depth = aac_change_queue_depth,
> .sdev_groups = aac_dev_groups,
> .eh_abort_handler = aac_eh_abort,
> @@ -1776,6 +1787,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
> shost->max_lun = AAC_MAX_LUN;
>
> pci_set_drvdata(pdev, shost);
> + shost->nr_hw_queues = aac->max_msix;
> + shost->host_tagset = 1;
>
> error = scsi_add_host(shost, &pdev->dev);
> if (error)
> @@ -1908,6 +1921,7 @@ static void aac_remove_one(struct pci_dev *pdev)
> struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
>
> aac_cancel_rescan_worker(aac);
> + aac->use_map_queue = false;
> scsi_remove_host(shost);
>
> __aac_shutdown(aac);
> diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
> index 11ef58204e96..61949f374188 100644
> --- a/drivers/scsi/aacraid/src.c
> +++ b/drivers/scsi/aacraid/src.c
> @@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib)
> #endif
>
> u16 vector_no;
> + struct scsi_cmnd *scmd;
> + u32 blk_tag;
> + struct Scsi_Host *shost = dev->scsi_host_ptr;
> + struct blk_mq_queue_map *qmap;
>
> atomic_inc(&q->numpending);
>
> @@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib)
> if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
> && dev->sa_firmware)
> vector_no = aac_get_vector(dev);
> - else
> - vector_no = fib->vector_no;
> + else {
> + if (!fib->vector_no || !fib->callback_data) {
> + if (shost && dev->use_map_queue) {
> + qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> + vector_no = qmap->mq_map[raw_smp_processor_id()];
> + }
> + /*
> + * We hardcode the vector_no for
> + * reserved commands as a valid shost is
> + * absent during the init
> + */
> + else
> + vector_no = 0;
> + } else {
> + scmd = (struct scsi_cmnd *)fib->callback_data;
> + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> + vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
Hannes' patch in the bugzilla was to revert to using hw queue #0 always
for internal commands, and it didn't help.
Could there be any issue in using hw queue #0 for regular SCSI commands?
AFAICS, that's a significant change. Previously we would use
fib->vector_no to decide the queue, which was in range (1, dev->max_msix).
BTW, is there any code which relies on a command being sent/received on
the HW queue same as fib->vector_no?
Thanks,
John
> +
> + }
>
> if (native_hba) {
> if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-11-23 12:01 ` John Garry
@ 2023-11-23 12:51 ` James Bottomley
2023-11-24 6:32 ` Hannes Reinecke
1 sibling, 0 replies; 7+ messages in thread
From: James Bottomley @ 2023-11-23 12:51 UTC (permalink / raw)
To: John Garry, Sagar Biradar, Don Brace, Gilbert Wu, linux-scsi,
Martin Petersen, Brian King, stable, Tom White, regressions,
hare
On Thu, 2023-11-23 at 12:01 +0000, John Garry wrote:
> On 20/05/2023 00:08, Sagar Biradar wrote:
> > Fix the IO hang that arises because of MSIx vector not
> > having a mapped online CPU upon receiving completion.
> >
> > The SCSI cmds take the blk_mq route, which is setup during the
> > init. The reserved cmds fetch the vector_no from mq_map after the
> > init is complete and before the init, they use 0 - as per the norm.
> >
> > Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
> > Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
>
> This the patch which seems to be causing the issue in
> https://bugzilla.kernel.org/show_bug.cgi?id=217599
>
> I will comment here since I got no response there...
We can still do a clean revert of this commit if no other solution is
found before the end of the 6.7 rc cycle.
Regards,
James
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-11-23 12:01 ` John Garry
2023-11-23 12:51 ` James Bottomley
@ 2023-11-24 6:32 ` Hannes Reinecke
1 sibling, 0 replies; 7+ messages in thread
From: Hannes Reinecke @ 2023-11-24 6:32 UTC (permalink / raw)
To: John Garry, Sagar Biradar, Don Brace, Gilbert Wu, linux-scsi,
Martin Petersen, James Bottomley, Brian King, stable, Tom White,
regressions, hare
On 11/23/23 13:01, John Garry wrote:
> On 20/05/2023 00:08, Sagar Biradar wrote:
>> Fix the IO hang that arises because of MSIx vector not
>> having a mapped online CPU upon receiving completion.
>>
>> The SCSI cmds take the blk_mq route, which is setup during the init.
>> The reserved cmds fetch the vector_no from mq_map after the init
>> is complete and before the init, they use 0 - as per the norm.
>>
>> Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
>> Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
>
> This the patch which seems to be causing the issue in
> https://bugzilla.kernel.org/show_bug.cgi?id=217599
>
> I will comment here since I got no response there...
>
>> ---
>> drivers/scsi/aacraid/aacraid.h | 1 +
>> drivers/scsi/aacraid/comminit.c | 1 -
>> drivers/scsi/aacraid/commsup.c | 6 +++++-
>> drivers/scsi/aacraid/linit.c | 14 ++++++++++++++
>> drivers/scsi/aacraid/src.c | 25 +++++++++++++++++++++++--
>> 5 files changed, 43 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/scsi/aacraid/aacraid.h
>> b/drivers/scsi/aacraid/aacraid.h
>> index 5e115e8b2ba4..7c6efde75da6 100644
>> --- a/drivers/scsi/aacraid/aacraid.h
>> +++ b/drivers/scsi/aacraid/aacraid.h
>> @@ -1678,6 +1678,7 @@ struct aac_dev
>> u32 handle_pci_error;
>> bool init_reset;
>> u8 soft_reset_support;
>> + u8 use_map_queue;
>> };
>> #define aac_adapter_interrupt(dev) \
>> diff --git a/drivers/scsi/aacraid/comminit.c
>> b/drivers/scsi/aacraid/comminit.c
>> index bd99c5492b7d..a5483e7e283a 100644
>> --- a/drivers/scsi/aacraid/comminit.c
>> +++ b/drivers/scsi/aacraid/comminit.c
>> @@ -657,4 +657,3 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
>> return dev;
>> }
>> -
>> diff --git a/drivers/scsi/aacraid/commsup.c
>> b/drivers/scsi/aacraid/commsup.c
>> index deb32c9f4b3e..3f062e4013ab 100644
>> --- a/drivers/scsi/aacraid/commsup.c
>> +++ b/drivers/scsi/aacraid/commsup.c
>> @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
>> struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd
>> *scmd)
>> {
>> struct fib *fibptr;
>> + u32 blk_tag;
>> + int i;
>> - fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
>> + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
>> + i = blk_mq_unique_tag_to_tag(blk_tag);
>> + fibptr = &dev->fibs[i];
>> /*
>> * Null out fields that depend on being zero at the start of
>> * each I/O
>> diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
>> index 5ba5c18b77b4..9caf8c314ce1 100644
>> --- a/drivers/scsi/aacraid/linit.c
>> +++ b/drivers/scsi/aacraid/linit.c
>> @@ -19,6 +19,7 @@
>> #include <linux/compat.h>
>> #include <linux/blkdev.h>
>> +#include <linux/blk-mq-pci.h>
>> #include <linux/completion.h>
>> #include <linux/init.h>
>> #include <linux/interrupt.h>
>> @@ -505,6 +506,15 @@ static int aac_slave_configure(struct scsi_device
>> *sdev)
>> return 0;
>> }
>> +static void aac_map_queues(struct Scsi_Host *shost)
>> +{
>> + struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
>> +
>> + blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
>> + aac->pdev, 0);
>> + aac->use_map_queue = true;
>> +}
>> +
>> /**
>> * aac_change_queue_depth - alter queue depths
>> * @sdev: SCSI device we are considering
>> @@ -1489,6 +1499,7 @@ static struct scsi_host_template
>> aac_driver_template = {
>> .bios_param = aac_biosparm,
>> .shost_groups = aac_host_groups,
>> .slave_configure = aac_slave_configure,
>> + .map_queues = aac_map_queues,
>> .change_queue_depth = aac_change_queue_depth,
>> .sdev_groups = aac_dev_groups,
>> .eh_abort_handler = aac_eh_abort,
>> @@ -1776,6 +1787,8 @@ static int aac_probe_one(struct pci_dev *pdev,
>> const struct pci_device_id *id)
>> shost->max_lun = AAC_MAX_LUN;
>> pci_set_drvdata(pdev, shost);
>> + shost->nr_hw_queues = aac->max_msix;
>> + shost->host_tagset = 1;
>> error = scsi_add_host(shost, &pdev->dev);
>> if (error)
>> @@ -1908,6 +1921,7 @@ static void aac_remove_one(struct pci_dev *pdev)
>> struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
>> aac_cancel_rescan_worker(aac);
>> + aac->use_map_queue = false;
>> scsi_remove_host(shost);
>> __aac_shutdown(aac);
>> diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
>> index 11ef58204e96..61949f374188 100644
>> --- a/drivers/scsi/aacraid/src.c
>> +++ b/drivers/scsi/aacraid/src.c
>> @@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib)
>> #endif
>> u16 vector_no;
>> + struct scsi_cmnd *scmd;
>> + u32 blk_tag;
>> + struct Scsi_Host *shost = dev->scsi_host_ptr;
>> + struct blk_mq_queue_map *qmap;
>> atomic_inc(&q->numpending);
>> @@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib)
>> if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
>> && dev->sa_firmware)
>> vector_no = aac_get_vector(dev);
>> - else
>> - vector_no = fib->vector_no;
>> + else {
>> + if (!fib->vector_no || !fib->callback_data) {
>> + if (shost && dev->use_map_queue) {
>> + qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
>> + vector_no = qmap->mq_map[raw_smp_processor_id()];
>> + }
>> + /*
>> + * We hardcode the vector_no for
>> + * reserved commands as a valid shost is
>> + * absent during the init
>> + */
>> + else
>> + vector_no = 0;
>> + } else {
>> + scmd = (struct scsi_cmnd *)fib->callback_data;
>> + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
>> + vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
>
>
>
> Hannes' patch in the bugzilla was to revert to using hw queue #0 always
> for internal commands, and it didn't help.
>
> Could there be any issue in using hw queue #0 for regular SCSI commands?
>
> AFAICS, that's a significant change. Previously we would use
> fib->vector_no to decide the queue, which was in range (1, dev->max_msix).
>
> BTW, is there any code which relies on a command being sent/received on
> the HW queue same as fib->vector_no?
>
Yeah, and that's the clincher.
The vector/MSIx interrupt to use for the completion is encoded in the
command (cf drivers/scsi/aacraid/src.c:aac_src_deliver_message()):
fib->hw_fib_va)->reply_qid = vector_no;
_and_ the command index:
fib->hw_fib_va)->request_id += (vector_no << 16);
so if we miscalculate the vector number here the command completion
will lookup the wrong command, and we get this issue.
But maybe the fix is relatively simple; can you try this:
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 61949f374188..698a206a2b43 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -510,7 +510,7 @@ static int aac_src_deliver_message(struct fib *fib)
&& dev->sa_firmware)
vector_no = aac_get_vector(dev);
else {
- if (!fib->vector_no || !fib->callback_data) {
+ if (!fib->callback_data) {
if (shost && dev->use_map_queue) {
qmap =
&shost->tag_set.map[HCTX_TYPE_DEFAULT];
vector_no =
qmap->mq_map[raw_smp_processor_id()];
Hmm?
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Ivo Totev, Andrew McDonald,
Werner Knoblich
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-11-24 6:42 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-19 23:08 [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
2023-05-19 23:18 ` kernel test robot
2023-06-08 21:09 ` Sagar.Biradar
2023-06-15 2:15 ` Martin K. Petersen
2023-11-23 12:01 ` John Garry
2023-11-23 12:51 ` James Bottomley
2023-11-24 6:32 ` Hannes Reinecke
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.