All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libata: revert "libata: use blk taging" et al.
@ 2015-03-11 18:15 Tony Battersby
  2015-03-11 21:45   ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Tony Battersby @ 2015-03-11 18:15 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Shaohua Li, linux-ide
  Cc: Christoph Hellwig, Dan Williams, linux-kernel

This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
98bd4be1ba95f2fe7f543910792b7163a5de06eb.

Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
with scsi-mq enabled:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
PGD 32adf0067 PUD 32adf1067 PMD 0 
Oops: 0002 [#1] SMP DEBUG_PAGEALLOC 
Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
eeprom w83795 i2c_i801
CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12  
task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
RSP: 0018:ffff88032a067858  EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
Stack:
 ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
 ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
 ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
Call Trace:
 [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
 [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
 [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
 [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
 [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
 [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
 [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
 [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
 [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
 [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
 [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
 [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
 [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
 [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
 [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
 [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
 [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
 [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
 [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
 [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
 [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
 [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
 [<ffffffff8032ee54>] SyS_write+0x54/0xc0
 [<ffffffff80689932>] system_call_fastpath+0x12/0x17
Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64 
RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
 RSP <ffff88032a067858>
CR2: 0000000000000058
---[ end trace 43f5eefb64627eff ]---


scsi-mq uses a host-wide tag map shared among all devices with some
integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
ata_qc_new_init(), causing the oops above.

Due to conflicts, it is also necessary to revert commit 98bd4be1ba95
("libata: move sas ata tag allocation to libata-scsi.c"), which appears
to have been a follow-on cleanup to the commit that caused the problem.

Fixes: 12cb5ce101ab ("libata: use blk taging")
Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
---

Note: when reverting the two commits above, I had to fixup conflicts
with the following commit:
72dd299d5039a336493993dcc63413cf31d0e662 ("libata: allow sata_sil24 to
opt-out of tag ordered submission")

If anyone else can suggest a fix that does not involve reverting the
commits, then I would be happy to test.

The oops output was generated using the SCSI generic driver (sg) to send
commands to SATA disks connected to a pm80xx HBA.

Here is the code relevant to the oops:

enum {
	ATA_MAX_QUEUE = 32,
};

static inline unsigned int ata_tag_valid(unsigned int tag)
{
	return (tag < ATA_MAX_QUEUE) ? 1 : 0;
}

static inline struct ata_queued_cmd *__ata_qc_from_tag(struct ata_port *ap,
						       unsigned int tag)
{
	if (likely(ata_tag_valid(tag)))
		return &ap->qcmd[tag];
	return NULL;
}

struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
{
	...
	qc = __ata_qc_from_tag(ap, tag); /* tag >= ATA_MAX_QUEUE */
	qc->tag = tag; /* qc is NULL, OOPS HERE */
	...
}

diff -urpN linux-4.0.0-rc3-a/drivers/ata/libata-core.c linux-4.0.0-rc3-b/drivers/ata/libata-core.c
--- linux-4.0.0-rc3-a/drivers/ata/libata-core.c	2015-03-08 19:09:09.000000000 -0400
+++ linux-4.0.0-rc3-b/drivers/ata/libata-core.c	2015-03-10 14:13:44.000000000 -0400
@@ -1585,6 +1585,8 @@ unsigned ata_exec_internal_sg(struct ata
 	else
 		tag = 0;
 
+	if (test_and_set_bit(tag, &ap->qc_allocated))
+		BUG();
 	qc = __ata_qc_from_tag(ap, tag);
 
 	qc->tag = tag;
@@ -4720,36 +4722,69 @@ void swap_buf_le16(u16 *buf, unsigned in
 }
 
 /**
- *	ata_qc_new_init - Request an available ATA command, and initialize it
- *	@dev: Device from whom we request an available command structure
+ *	ata_qc_new - Request an available ATA command, for queueing
+ *	@ap: target port
+ *
+ *	Some ATA host controllers may implement a queue depth which is less
+ *	than ATA_MAX_QUEUE. So we shouldn't allocate a tag which is beyond
+ *	the hardware limitation.
  *
  *	LOCKING:
  *	None.
  */
 
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
+static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 {
-	struct ata_port *ap = dev->link->ap;
-	struct ata_queued_cmd *qc;
+	struct ata_queued_cmd *qc = NULL;
+	unsigned int max_queue = ap->host->n_tags;
+	unsigned int i, tag;
 
 	/* no command while frozen */
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
 		return NULL;
 
-	/* libsas case */
-	if (!ap->scsi_host) {
-		tag = ata_sas_allocate_tag(ap);
-		if (tag < 0)
-			return NULL;
+	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
+		if (ap->flags & ATA_FLAG_LOWTAG)
+			tag = i;
+		else
+			tag = tag < max_queue ? tag : 0;
+
+		/* the last tag is reserved for internal command. */
+		if (tag == ATA_TAG_INTERNAL)
+			continue;
+
+		if (!test_and_set_bit(tag, &ap->qc_allocated)) {
+			qc = __ata_qc_from_tag(ap, tag);
+			qc->tag = tag;
+			ap->last_tag = tag;
+			break;
+		}
 	}
 
-	qc = __ata_qc_from_tag(ap, tag);
-	qc->tag = tag;
-	qc->scsicmd = NULL;
-	qc->ap = ap;
-	qc->dev = dev;
+	return qc;
+}
 
-	ata_qc_reinit(qc);
+/**
+ *	ata_qc_new_init - Request an available ATA command, and initialize it
+ *	@dev: Device from whom we request an available command structure
+ *
+ *	LOCKING:
+ *	None.
+ */
+
+struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
+{
+	struct ata_port *ap = dev->link->ap;
+	struct ata_queued_cmd *qc;
+
+	qc = ata_qc_new(ap);
+	if (qc) {
+		qc->scsicmd = NULL;
+		qc->ap = ap;
+		qc->dev = dev;
+
+		ata_qc_reinit(qc);
+	}
 
 	return qc;
 }
@@ -4776,8 +4811,7 @@ void ata_qc_free(struct ata_queued_cmd *
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		if (!ap->scsi_host)
-			ata_sas_free_tag(tag, ap);
+		clear_bit(tag, &ap->qc_allocated);
 	}
 }
 
diff -urpN linux-4.0.0-rc3-a/drivers/ata/libata.h linux-4.0.0-rc3-b/drivers/ata/libata.h
--- linux-4.0.0-rc3-a/drivers/ata/libata.h	2015-03-08 19:09:09.000000000 -0400
+++ linux-4.0.0-rc3-b/drivers/ata/libata.h	2015-03-10 14:12:38.000000000 -0400
@@ -63,7 +63,7 @@ extern struct ata_link *ata_dev_phys_lin
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
+extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
@@ -144,8 +144,6 @@ extern void ata_scsi_dev_rescan(struct w
 extern int ata_bus_probe(struct ata_port *ap);
 extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 			      unsigned int id, u64 lun);
-int ata_sas_allocate_tag(struct ata_port *ap);
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
 
 
 /* libata-eh.c */
diff -urpN linux-4.0.0-rc3-a/drivers/ata/libata-scsi.c linux-4.0.0-rc3-b/drivers/ata/libata-scsi.c
--- linux-4.0.0-rc3-a/drivers/ata/libata-scsi.c	2015-03-08 19:09:09.000000000 -0400
+++ linux-4.0.0-rc3-b/drivers/ata/libata-scsi.c	2015-03-10 14:12:38.000000000 -0400
@@ -756,7 +756,7 @@ static struct ata_queued_cmd *ata_scsi_q
 {
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new_init(dev, cmd->request->tag);
+	qc = ata_qc_new_init(dev);
 	if (qc) {
 		qc->scsicmd = cmd;
 		qc->scsidone = cmd->scsi_done;
@@ -3668,9 +3668,6 @@ int ata_scsi_add_hosts(struct ata_host *
 		 */
 		shost->max_host_blocked = 1;
 
-		if (scsi_init_shared_tag_map(shost, host->n_tags))
-			goto err_add;
-
 		rc = scsi_add_host_with_dma(ap->scsi_host,
 						&ap->tdev, ap->host->dev);
 		if (rc)
@@ -4233,31 +4230,3 @@ int ata_sas_queuecmd(struct scsi_cmnd *c
 	return rc;
 }
 EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
-
-int ata_sas_allocate_tag(struct ata_port *ap)
-{
-	unsigned int max_queue = ap->host->n_tags;
-	unsigned int i, tag;
-
-	for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
-		if (ap->flags & ATA_FLAG_LOWTAG)
-			tag = 1;
-		else
-			tag = tag < max_queue ? tag : 0;
-
-		/* the last tag is reserved for internal command. */
-		if (tag == ATA_TAG_INTERNAL)
-			continue;
-
-		if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
-			ap->sas_last_tag = tag;
-			return tag;
-		}
-	}
-	return -1;
-}
-
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
-{
-	clear_bit(tag, &ap->sas_tag_allocated);
-}
diff -urpN linux-4.0.0-rc3-a/include/linux/libata.h linux-4.0.0-rc3-b/include/linux/libata.h
--- linux-4.0.0-rc3-a/include/linux/libata.h	2015-03-08 19:09:09.000000000 -0400
+++ linux-4.0.0-rc3-b/include/linux/libata.h	2015-03-10 14:12:38.000000000 -0400
@@ -823,10 +823,10 @@ struct ata_port {
 	unsigned int		cbl;	/* cable type; ATA_CBL_xxx */
 
 	struct ata_queued_cmd	qcmd[ATA_MAX_QUEUE];
-	unsigned long		sas_tag_allocated; /* for sas tag allocation only */
+	unsigned long		qc_allocated;
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
-	unsigned int		sas_last_tag;	/* track next tag hw expects */
+	unsigned int		last_tag;	/* track next tag hw expects */
 
 	struct ata_link		link;		/* host default link */
 	struct ata_link		*slave_link;	/* see ata_slave_link_init() */
@@ -1352,7 +1352,6 @@ extern struct device_attribute *ata_comm
 	.ioctl			= ata_scsi_ioctl,		\
 	.queuecommand		= ata_scsi_queuecmd,		\
 	.can_queue		= ATA_DEF_QUEUE,		\
-	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.this_id		= ATA_SHT_THIS_ID,		\
 	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,		\
 	.emulated		= ATA_SHT_EMULATED,		\

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-11 18:15 [PATCH] libata: revert "libata: use blk taging" et al Tony Battersby
@ 2015-03-11 21:45   ` Jens Axboe
  0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2015-03-11 21:45 UTC (permalink / raw)
  To: Tony Battersby, Tejun Heo, Shaohua Li, linux-ide
  Cc: Christoph Hellwig, Dan Williams, linux-kernel

On 03/11/2015 02:15 PM, Tony Battersby wrote:
> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
>
> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
> with scsi-mq enabled:
>
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> PGD 32adf0067 PUD 32adf1067 PMD 0
> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
> eeprom w83795 i2c_i801
> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
> Stack:
>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
> Call Trace:
>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>   RSP <ffff88032a067858>
> CR2: 0000000000000058
> ---[ end trace 43f5eefb64627eff ]---
>
>
> scsi-mq uses a host-wide tag map shared among all devices with some
> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
> ata_qc_new_init(), causing the oops above.

Wait, something is missing here. We should not be getting tag values 
that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
out why this is happening, and fix it. That is correct way forward here. 
What setup is this being reproduced on?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
@ 2015-03-11 21:45   ` Jens Axboe
  0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2015-03-11 21:45 UTC (permalink / raw)
  To: Tony Battersby, Tejun Heo, Shaohua Li, linux-ide
  Cc: Christoph Hellwig, Dan Williams, linux-kernel

On 03/11/2015 02:15 PM, Tony Battersby wrote:
> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
>
> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
> with scsi-mq enabled:
>
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> PGD 32adf0067 PUD 32adf1067 PMD 0
> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
> eeprom w83795 i2c_i801
> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
> Stack:
>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
> Call Trace:
>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>   RSP <ffff88032a067858>
> CR2: 0000000000000058
> ---[ end trace 43f5eefb64627eff ]---
>
>
> scsi-mq uses a host-wide tag map shared among all devices with some
> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
> ata_qc_new_init(), causing the oops above.

Wait, something is missing here. We should not be getting tag values 
that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
out why this is happening, and fix it. That is correct way forward here. 
What setup is this being reproduced on?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-11 21:45   ` Jens Axboe
  (?)
@ 2015-03-11 22:19   ` Tony Battersby
  2015-03-12  1:53       ` Shaohua Li
  2015-03-12  2:31       ` Shaohua Li
  -1 siblings, 2 replies; 15+ messages in thread
From: Tony Battersby @ 2015-03-11 22:19 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Shaohua Li, linux-ide
  Cc: Christoph Hellwig, Dan Williams, linux-kernel

On 03/11/2015 05:45 PM, Jens Axboe wrote:
> On 03/11/2015 02:15 PM, Tony Battersby wrote:
>> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
>> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
>>
>> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
>> with scsi-mq enabled:
>>
>> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
>> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>> PGD 32adf0067 PUD 32adf1067 PMD 0
>> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
>> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
>> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
>> eeprom w83795 i2c_i801
>> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
>> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
>> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
>> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
>> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
>> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
>> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
>> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
>> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
>> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
>> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
>> Stack:
>>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
>>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
>>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
>> Call Trace:
>>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
>>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
>>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
>>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
>>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
>>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
>>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
>>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
>>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
>>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
>>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
>>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
>>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
>>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
>>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
>>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
>>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
>>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
>> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
>> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
>> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
>> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>>   RSP <ffff88032a067858>
>> CR2: 0000000000000058
>> ---[ end trace 43f5eefb64627eff ]---
>>
>>
>> scsi-mq uses a host-wide tag map shared among all devices with some
>> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
>> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
>> ata_qc_new_init(), causing the oops above.
> Wait, something is missing here. We should not be getting tag values 
> that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
> out why this is happening, and fix it. That is correct way forward here. 
> What setup is this being reproduced on?
>

Hardware:
PMC 8001 SAS HBA (PCI ID 117c:0042, PCI sub-ID 117c:0043) using pm80xx
driver
4 SATA disks directly connected (no expander)

Test procedure:
Send disk read/write commands to SATA disks using the SCSI generic (sg)
driver.

Analysis:

shost->can_queue is 508

With my patch applied to revert the problematic commits, I added the
following code to ata_scsi_qc_new():

    int tag = cmd->request->tag;
    static int max_tag;
    if (tag > max_tag) {
        max_tag = tag;
        printk(KERN_DEBUG "max tag %d\n", tag);
    }

Testing one SATA disk at a time with scsi-mq enabled, I get a max tag of
64.  Testing 4 disks at a time with scsi-mq enabled gives a max tag of
194.  With scsi-mq disabled, I get a max tag of 30 no matter how many
disks I test.

Tony

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-11 22:19   ` Tony Battersby
@ 2015-03-12  1:53       ` Shaohua Li
  2015-03-12  2:31       ` Shaohua Li
  1 sibling, 0 replies; 15+ messages in thread
From: Shaohua Li @ 2015-03-12  1:53 UTC (permalink / raw)
  To: Tony Battersby
  Cc: Jens Axboe, Tejun Heo, linux-ide, Christoph Hellwig,
	Dan Williams, linux-kernel

On Wed, Mar 11, 2015 at 06:19:27PM -0400, Tony Battersby wrote:
> On 03/11/2015 05:45 PM, Jens Axboe wrote:
> > On 03/11/2015 02:15 PM, Tony Battersby wrote:
> >> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
> >> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
> >>
> >> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
> >> with scsi-mq enabled:
> >>
> >> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
> >> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> PGD 32adf0067 PUD 32adf1067 PMD 0
> >> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
> >> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
> >> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
> >> eeprom w83795 i2c_i801
> >> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
> >> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
> >> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
> >> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
> >> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
> >> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
> >> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
> >> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
> >> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
> >> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
> >> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> >> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
> >> Stack:
> >>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
> >>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
> >>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
> >> Call Trace:
> >>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
> >>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
> >>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
> >>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
> >>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
> >>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
> >>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
> >>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
> >>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
> >>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
> >>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
> >>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
> >>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
> >> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
> >> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
> >> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
> >> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >>   RSP <ffff88032a067858>
> >> CR2: 0000000000000058
> >> ---[ end trace 43f5eefb64627eff ]---
> >>
> >>
> >> scsi-mq uses a host-wide tag map shared among all devices with some
> >> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
> >> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
> >> ata_qc_new_init(), causing the oops above.
> > Wait, something is missing here. We should not be getting tag values 
> > that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
> > out why this is happening, and fix it. That is correct way forward here. 
> > What setup is this being reproduced on?
> >
> 
> Hardware:
> PMC 8001 SAS HBA (PCI ID 117c:0042, PCI sub-ID 117c:0043) using pm80xx
> driver
> 4 SATA disks directly connected (no expander)
> 
> Test procedure:
> Send disk read/write commands to SATA disks using the SCSI generic (sg)
> driver.
> 
> Analysis:
> 
> shost->can_queue is 508
> 
> With my patch applied to revert the problematic commits, I added the
> following code to ata_scsi_qc_new():
> 
>     int tag = cmd->request->tag;
>     static int max_tag;
>     if (tag > max_tag) {
>         max_tag = tag;
>         printk(KERN_DEBUG "max tag %d\n", tag);
>     }
> 
> Testing one SATA disk at a time with scsi-mq enabled, I get a max tag of
> 64.  Testing 4 disks at a time with scsi-mq enabled gives a max tag of
> 194.  With scsi-mq disabled, I get a max tag of 30 no matter how many
> disks I test.

ata_qc_new_init() has a check, if !ap->scsi_host, request tag will not
be used and fallback to original tag allocation. you are using sas
controller, so the problem is why ap->scsi_host is set in your case.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
@ 2015-03-12  1:53       ` Shaohua Li
  0 siblings, 0 replies; 15+ messages in thread
From: Shaohua Li @ 2015-03-12  1:53 UTC (permalink / raw)
  To: Tony Battersby
  Cc: Jens Axboe, Tejun Heo, linux-ide, Christoph Hellwig,
	Dan Williams, linux-kernel

On Wed, Mar 11, 2015 at 06:19:27PM -0400, Tony Battersby wrote:
> On 03/11/2015 05:45 PM, Jens Axboe wrote:
> > On 03/11/2015 02:15 PM, Tony Battersby wrote:
> >> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
> >> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
> >>
> >> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
> >> with scsi-mq enabled:
> >>
> >> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
> >> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> PGD 32adf0067 PUD 32adf1067 PMD 0
> >> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
> >> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
> >> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
> >> eeprom w83795 i2c_i801
> >> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
> >> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
> >> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
> >> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
> >> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
> >> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
> >> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
> >> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
> >> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
> >> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
> >> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> >> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
> >> Stack:
> >>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
> >>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
> >>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
> >> Call Trace:
> >>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
> >>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
> >>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
> >>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
> >>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
> >>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
> >>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
> >>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
> >>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
> >>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
> >>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
> >>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
> >>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
> >> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
> >> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
> >> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
> >> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >>   RSP <ffff88032a067858>
> >> CR2: 0000000000000058
> >> ---[ end trace 43f5eefb64627eff ]---
> >>
> >>
> >> scsi-mq uses a host-wide tag map shared among all devices with some
> >> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
> >> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
> >> ata_qc_new_init(), causing the oops above.
> > Wait, something is missing here. We should not be getting tag values 
> > that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
> > out why this is happening, and fix it. That is correct way forward here. 
> > What setup is this being reproduced on?
> >
> 
> Hardware:
> PMC 8001 SAS HBA (PCI ID 117c:0042, PCI sub-ID 117c:0043) using pm80xx
> driver
> 4 SATA disks directly connected (no expander)
> 
> Test procedure:
> Send disk read/write commands to SATA disks using the SCSI generic (sg)
> driver.
> 
> Analysis:
> 
> shost->can_queue is 508
> 
> With my patch applied to revert the problematic commits, I added the
> following code to ata_scsi_qc_new():
> 
>     int tag = cmd->request->tag;
>     static int max_tag;
>     if (tag > max_tag) {
>         max_tag = tag;
>         printk(KERN_DEBUG "max tag %d\n", tag);
>     }
> 
> Testing one SATA disk at a time with scsi-mq enabled, I get a max tag of
> 64.  Testing 4 disks at a time with scsi-mq enabled gives a max tag of
> 194.  With scsi-mq disabled, I get a max tag of 30 no matter how many
> disks I test.

ata_qc_new_init() has a check, if !ap->scsi_host, request tag will not
be used and fallback to original tag allocation. you are using sas
controller, so the problem is why ap->scsi_host is set in your case.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-11 22:19   ` Tony Battersby
@ 2015-03-12  2:31       ` Shaohua Li
  2015-03-12  2:31       ` Shaohua Li
  1 sibling, 0 replies; 15+ messages in thread
From: Shaohua Li @ 2015-03-12  2:31 UTC (permalink / raw)
  To: Tony Battersby
  Cc: Jens Axboe, Tejun Heo, linux-ide, Christoph Hellwig,
	Dan Williams, linux-kernel

On Wed, Mar 11, 2015 at 06:19:27PM -0400, Tony Battersby wrote:
> On 03/11/2015 05:45 PM, Jens Axboe wrote:
> > On 03/11/2015 02:15 PM, Tony Battersby wrote:
> >> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
> >> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
> >>
> >> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
> >> with scsi-mq enabled:
> >>
> >> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
> >> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> PGD 32adf0067 PUD 32adf1067 PMD 0
> >> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
> >> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
> >> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
> >> eeprom w83795 i2c_i801
> >> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
> >> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
> >> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
> >> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
> >> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
> >> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
> >> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
> >> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
> >> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
> >> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
> >> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> >> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
> >> Stack:
> >>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
> >>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
> >>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
> >> Call Trace:
> >>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
> >>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
> >>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
> >>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
> >>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
> >>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
> >>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
> >>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
> >>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
> >>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
> >>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
> >>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
> >>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
> >> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
> >> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
> >> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
> >> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >>   RSP <ffff88032a067858>
> >> CR2: 0000000000000058
> >> ---[ end trace 43f5eefb64627eff ]---
> >>
> >>
> >> scsi-mq uses a host-wide tag map shared among all devices with some
> >> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
> >> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
> >> ata_qc_new_init(), causing the oops above.
> > Wait, something is missing here. We should not be getting tag values 
> > that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
> > out why this is happening, and fix it. That is correct way forward here. 
> > What setup is this being reproduced on?
> >
> 
> Hardware:
> PMC 8001 SAS HBA (PCI ID 117c:0042, PCI sub-ID 117c:0043) using pm80xx
> driver
> 4 SATA disks directly connected (no expander)
> 
> Test procedure:
> Send disk read/write commands to SATA disks using the SCSI generic (sg)
> driver.
> 
> Analysis:
> 
> shost->can_queue is 508
> 
> With my patch applied to revert the problematic commits, I added the
> following code to ata_scsi_qc_new():
> 
>     int tag = cmd->request->tag;
>     static int max_tag;
>     if (tag > max_tag) {
>         max_tag = tag;
>         printk(KERN_DEBUG "max tag %d\n", tag);
>     }
> 
> Testing one SATA disk at a time with scsi-mq enabled, I get a max tag of
> 64.  Testing 4 disks at a time with scsi-mq enabled gives a max tag of
> 194.  With scsi-mq disabled, I get a max tag of 30 no matter how many
> disks I test.

Can you please try this debug patch:

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc..46f153f 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -572,7 +572,6 @@ int sas_ata_init(struct domain_device *found_dev)
 
 	ap->private_data = found_dev;
 	ap->cbl = ATA_CBL_SATA;
-	ap->scsi_host = shost;
 	rc = ata_sas_port_init(ap);
 	if (rc) {
 		ata_sas_port_destroy(ap);

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
@ 2015-03-12  2:31       ` Shaohua Li
  0 siblings, 0 replies; 15+ messages in thread
From: Shaohua Li @ 2015-03-12  2:31 UTC (permalink / raw)
  To: Tony Battersby
  Cc: Jens Axboe, Tejun Heo, linux-ide, Christoph Hellwig,
	Dan Williams, linux-kernel

On Wed, Mar 11, 2015 at 06:19:27PM -0400, Tony Battersby wrote:
> On 03/11/2015 05:45 PM, Jens Axboe wrote:
> > On 03/11/2015 02:15 PM, Tony Battersby wrote:
> >> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
> >> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
> >>
> >> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
> >> with scsi-mq enabled:
> >>
> >> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
> >> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> PGD 32adf0067 PUD 32adf1067 PMD 0
> >> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
> >> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
> >> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
> >> eeprom w83795 i2c_i801
> >> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
> >> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
> >> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
> >> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
> >> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
> >> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
> >> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
> >> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
> >> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
> >> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
> >> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> >> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
> >> Stack:
> >>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
> >>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
> >>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
> >> Call Trace:
> >>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
> >>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
> >>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
> >>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
> >>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
> >>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
> >>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
> >>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
> >>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
> >>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
> >>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
> >>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
> >>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
> >>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
> >>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
> >> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
> >> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
> >> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
> >> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
> >>   RSP <ffff88032a067858>
> >> CR2: 0000000000000058
> >> ---[ end trace 43f5eefb64627eff ]---
> >>
> >>
> >> scsi-mq uses a host-wide tag map shared among all devices with some
> >> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
> >> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
> >> ata_qc_new_init(), causing the oops above.
> > Wait, something is missing here. We should not be getting tag values 
> > that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure 
> > out why this is happening, and fix it. That is correct way forward here. 
> > What setup is this being reproduced on?
> >
> 
> Hardware:
> PMC 8001 SAS HBA (PCI ID 117c:0042, PCI sub-ID 117c:0043) using pm80xx
> driver
> 4 SATA disks directly connected (no expander)
> 
> Test procedure:
> Send disk read/write commands to SATA disks using the SCSI generic (sg)
> driver.
> 
> Analysis:
> 
> shost->can_queue is 508
> 
> With my patch applied to revert the problematic commits, I added the
> following code to ata_scsi_qc_new():
> 
>     int tag = cmd->request->tag;
>     static int max_tag;
>     if (tag > max_tag) {
>         max_tag = tag;
>         printk(KERN_DEBUG "max tag %d\n", tag);
>     }
> 
> Testing one SATA disk at a time with scsi-mq enabled, I get a max tag of
> 64.  Testing 4 disks at a time with scsi-mq enabled gives a max tag of
> 194.  With scsi-mq disabled, I get a max tag of 30 no matter how many
> disks I test.

Can you please try this debug patch:

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc..46f153f 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -572,7 +572,6 @@ int sas_ata_init(struct domain_device *found_dev)
 
 	ap->private_data = found_dev;
 	ap->cbl = ATA_CBL_SATA;
-	ap->scsi_host = shost;
 	rc = ata_sas_port_init(ap);
 	if (rc) {
 		ata_sas_port_destroy(ap);

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-12  2:31       ` Shaohua Li
  (?)
@ 2015-03-12 10:14       ` Dan Williams
  2015-03-12 12:11         ` Tejun Heo
  -1 siblings, 1 reply; 15+ messages in thread
From: Dan Williams @ 2015-03-12 10:14 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Tony Battersby, Jens Axboe, Tejun Heo, IDE/ATA development list,
	Christoph Hellwig, linux-kernel

On Wed, Mar 11, 2015 at 10:31 PM, Shaohua Li <shli@fb.com> wrote:
> On Wed, Mar 11, 2015 at 06:19:27PM -0400, Tony Battersby wrote:
>> On 03/11/2015 05:45 PM, Jens Axboe wrote:
>> > On 03/11/2015 02:15 PM, Tony Battersby wrote:
>> >> This reverts commits 12cb5ce101abfaf74421f8cc9f196e708209eb79 and
>> >> 98bd4be1ba95f2fe7f543910792b7163a5de06eb.
>> >>
>> >> Commit 12cb5ce101ab ("libata: use blk taging") causes the following oops
>> >> with scsi-mq enabled:
>> >>
>> >> BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
>> >> IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>> >> PGD 32adf0067 PUD 32adf1067 PMD 0
>> >> Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
>> >> Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
>> >> i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
>> >> eeprom w83795 i2c_i801
>> >> CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
>> >> Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
>> >> task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
>> >> RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>> >> RSP: 0018:ffff88032a067858  EFLAGS: 00010046
>> >> RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
>> >> RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
>> >> RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
>> >> R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
>> >> R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
>> >> FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
>> >> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>> >> CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
>> >> Stack:
>> >>   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
>> >>   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
>> >>   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
>> >> Call Trace:
>> >>   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
>> >>   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
>> >>   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
>> >>   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
>> >>   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
>> >>   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
>> >>   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
>> >>   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
>> >>   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
>> >>   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
>> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>> >>   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
>> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>> >>   [<ffffffff802a2b87>] ? lock_release_non_nested+0xa7/0x360
>> >>   [<ffffffff8068954b>] ? _raw_spin_unlock_irqrestore+0x3b/0x60
>> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>> >>   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
>> >>   [<ffffffff802a236c>] ? __lock_acquire+0x50c/0xc10
>> >>   [<ffffffff802cb1e9>] ? do_futex+0x109/0xbf0
>> >>   [<ffffffff80308b8e>] ? might_fault+0x5e/0xb0
>> >>   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
>> >>   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
>> >>   [<ffffffff80689932>] system_call_fastpath+0x12/0x17
>> >> Code: 24 20 04 0f 85 ec 00 00 00 49 83 3c 24 00 0f 84 cf 00 00 00 83 fe 1f
>> >> 0f 87 dc 00 00 00 89 f0 48 69 c0 f0 00 00 00 49 8d 44 04 40 <89> 70 58 48
>> >> c7 40 10 00 00 00 00 4c 89 20 48 89 58 08 c7 40 64
>> >> RIP  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
>> >>   RSP <ffff88032a067858>
>> >> CR2: 0000000000000058
>> >> ---[ end trace 43f5eefb64627eff ]---
>> >>
>> >>
>> >> scsi-mq uses a host-wide tag map shared among all devices with some
>> >> integer tag values >= ATA_MAX_QUEUE.  These unexpectedly high tag values
>> >> cause __ata_qc_from_tag() to return NULL, which is then dereferenced in
>> >> ata_qc_new_init(), causing the oops above.
>> > Wait, something is missing here. We should not be getting tag values
>> > that are >= ATA_MAX_QUEUE. Instead of reverting this, we need to figure
>> > out why this is happening, and fix it. That is correct way forward here.
>> > What setup is this being reproduced on?
>> >
>>
>> Hardware:
>> PMC 8001 SAS HBA (PCI ID 117c:0042, PCI sub-ID 117c:0043) using pm80xx
>> driver
>> 4 SATA disks directly connected (no expander)
>>
>> Test procedure:
>> Send disk read/write commands to SATA disks using the SCSI generic (sg)
>> driver.
>>
>> Analysis:
>>
>> shost->can_queue is 508
>>
>> With my patch applied to revert the problematic commits, I added the
>> following code to ata_scsi_qc_new():
>>
>>     int tag = cmd->request->tag;
>>     static int max_tag;
>>     if (tag > max_tag) {
>>         max_tag = tag;
>>         printk(KERN_DEBUG "max tag %d\n", tag);
>>     }
>>
>> Testing one SATA disk at a time with scsi-mq enabled, I get a max tag of
>> 64.  Testing 4 disks at a time with scsi-mq enabled gives a max tag of
>> 194.  With scsi-mq disabled, I get a max tag of 30 no matter how many
>> disks I test.
>
> Can you please try this debug patch:
>
> diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
> index 932d9cc..46f153f 100644
> --- a/drivers/scsi/libsas/sas_ata.c
> +++ b/drivers/scsi/libsas/sas_ata.c
> @@ -572,7 +572,6 @@ int sas_ata_init(struct domain_device *found_dev)
>
>         ap->private_data = found_dev;
>         ap->cbl = ATA_CBL_SATA;
> -       ap->scsi_host = shost;
>         rc = ata_sas_port_init(ap);
>         if (rc) {
>                 ata_sas_port_destroy(ap);

We need a scsi_host for error recovery, see:

ata_std_sched_eh()

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-12 10:14       ` Dan Williams
@ 2015-03-12 12:11         ` Tejun Heo
  2015-03-12 12:46           ` Shaohua Li
  0 siblings, 1 reply; 15+ messages in thread
From: Tejun Heo @ 2015-03-12 12:11 UTC (permalink / raw)
  To: Dan Williams
  Cc: Shaohua Li, Tony Battersby, Jens Axboe, IDE/ATA development list,
	Christoph Hellwig, linux-kernel

On Thu, Mar 12, 2015 at 06:14:24AM -0400, Dan Williams wrote:
> > @@ -572,7 +572,6 @@ int sas_ata_init(struct domain_device *found_dev)
> >
> >         ap->private_data = found_dev;
> >         ap->cbl = ATA_CBL_SATA;
> > -       ap->scsi_host = shost;
> >         rc = ata_sas_port_init(ap);
> >         if (rc) {
> >                 ata_sas_port_destroy(ap);
> 
> We need a scsi_host for error recovery, see:
> 
> ata_std_sched_eh()

IOW, let's just add a flag bit to identify SAS hosts.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-12 12:11         ` Tejun Heo
@ 2015-03-12 12:46           ` Shaohua Li
  2015-03-12 13:51             ` Tony Battersby
  2015-03-12 13:59             ` Tejun Heo
  0 siblings, 2 replies; 15+ messages in thread
From: Shaohua Li @ 2015-03-12 12:46 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Dan Williams, Tony Battersby, Jens Axboe,
	IDE/ATA development list, Christoph Hellwig, linux-kernel

On Thu, Mar 12, 2015 at 08:11:57AM -0400, Tejun Heo wrote:
> On Thu, Mar 12, 2015 at 06:14:24AM -0400, Dan Williams wrote:
> > > @@ -572,7 +572,6 @@ int sas_ata_init(struct domain_device *found_dev)
> > >
> > >         ap->private_data = found_dev;
> > >         ap->cbl = ATA_CBL_SATA;
> > > -       ap->scsi_host = shost;
> > >         rc = ata_sas_port_init(ap);
> > >         if (rc) {
> > >                 ata_sas_port_destroy(ap);
> > 
> > We need a scsi_host for error recovery, see:
> > 
> > ata_std_sched_eh()
> 
> IOW, let's just add a flag bit to identify SAS hosts.

Yep. Tony, can you please try below patch please?


ata: Add a new flag for sas controller

Add a new flag to destinguish sas controller. sas controller has its own tag
allocation, which doesn't directly match to ata tag. We use the new flag for
sas ata tag allocation.

Reported-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Shaohua Li <shli@fb.com>

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4c35f08..ef150eb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 		return NULL;
 
 	/* libsas case */
-	if (!ap->scsi_host) {
+	if (ap->flags & ATA_FLAG_SAS_HOST) {
 		tag = ata_sas_allocate_tag(ap);
 		if (tag < 0)
 			return NULL;
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		if (!ap->scsi_host)
+		if (ap->flags & ATA_FLAG_SAS_HOST)
 			ata_sas_free_tag(tag, ap);
 	}
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9219953..d9afc51 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+			  ATA_FLAG_SAS_HOST,
 	.pio_mask	= ATA_PIO4_ONLY,
 	.mwdma_mask	= ATA_MWDMA2,
 	.udma_mask	= ATA_UDMA6,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc..9c706d8 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+		 ATA_FLAG_SAS_HOST,
 	.pio_mask = ATA_PIO4,
 	.mwdma_mask = ATA_MWDMA2,
 	.udma_mask = ATA_UDMA6,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index fc03efa..6b08cc1 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -232,6 +232,7 @@ enum {
 					      * led */
 	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
+	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-12 12:46           ` Shaohua Li
@ 2015-03-12 13:51             ` Tony Battersby
  2015-03-12 13:59             ` Tejun Heo
  1 sibling, 0 replies; 15+ messages in thread
From: Tony Battersby @ 2015-03-12 13:51 UTC (permalink / raw)
  To: Shaohua Li, Tejun Heo
  Cc: Dan Williams, Jens Axboe, IDE/ATA development list,
	Christoph Hellwig, linux-kernel

On 03/12/2015 08:46 AM, Shaohua Li wrote:
> Yep. Tony, can you please try below patch please?
>
>
> ata: Add a new flag for sas controller
>
> Add a new flag to destinguish sas controller. sas controller has its own tag
> allocation, which doesn't directly match to ata tag. We use the new flag for
> sas ata tag allocation.
>
> Reported-by: Tony Battersby <tonyb@cybernetics.com>
> Signed-off-by: Shaohua Li <shli@fb.com>
>

Yes, that fixes it.  Thanks!

Tested-by: Tony Battersby <tonyb@cybernetics.com>

> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 4c35f08..ef150eb 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
>  		return NULL;
>  
>  	/* libsas case */
> -	if (!ap->scsi_host) {
> +	if (ap->flags & ATA_FLAG_SAS_HOST) {
>  		tag = ata_sas_allocate_tag(ap);
>  		if (tag < 0)
>  			return NULL;
> @@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
>  	tag = qc->tag;
>  	if (likely(ata_tag_valid(tag))) {
>  		qc->tag = ATA_TAG_POISON;
> -		if (!ap->scsi_host)
> +		if (ap->flags & ATA_FLAG_SAS_HOST)
>  			ata_sas_free_tag(tag, ap);
>  	}
>  }
> diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> index 9219953..d9afc51 100644
> --- a/drivers/scsi/ipr.c
> +++ b/drivers/scsi/ipr.c
> @@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
>  };
>  
>  static struct ata_port_info sata_port_info = {
> -	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
> +	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
> +			  ATA_FLAG_SAS_HOST,
>  	.pio_mask	= ATA_PIO4_ONLY,
>  	.mwdma_mask	= ATA_MWDMA2,
>  	.udma_mask	= ATA_UDMA6,
> diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
> index 932d9cc..9c706d8 100644
> --- a/drivers/scsi/libsas/sas_ata.c
> +++ b/drivers/scsi/libsas/sas_ata.c
> @@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
>  };
>  
>  static struct ata_port_info sata_port_info = {
> -	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
> +	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
> +		 ATA_FLAG_SAS_HOST,
>  	.pio_mask = ATA_PIO4,
>  	.mwdma_mask = ATA_MWDMA2,
>  	.udma_mask = ATA_UDMA6,
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index fc03efa..6b08cc1 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -232,6 +232,7 @@ enum {
>  					      * led */
>  	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
>  	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
> +	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
>  
>  	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
>  
>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-12 12:46           ` Shaohua Li
  2015-03-12 13:51             ` Tony Battersby
@ 2015-03-12 13:59             ` Tejun Heo
  2015-03-12 17:32               ` Shaohua Li
  1 sibling, 1 reply; 15+ messages in thread
From: Tejun Heo @ 2015-03-12 13:59 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Dan Williams, Tony Battersby, Jens Axboe,
	IDE/ATA development list, Christoph Hellwig, linux-kernel

On Thu, Mar 12, 2015 at 05:46:01AM -0700, Shaohua Li wrote:
> ata: Add a new flag for sas controller
> 
> Add a new flag to destinguish sas controller. sas controller has its own tag
> allocation, which doesn't directly match to ata tag. We use the new flag for
> sas ata tag allocation.

This doesn't really explain why the patch came to be.  Can you please
update it to include what broke how and got fixed?  A Fixes: tag would
be nice too.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] libata: revert "libata: use blk taging" et al.
  2015-03-12 13:59             ` Tejun Heo
@ 2015-03-12 17:32               ` Shaohua Li
  2015-03-19 18:17                 ` [PATCH libata/for-4.0-fixes] ata: Add a new flag to destinguish sas controller Tejun Heo
  0 siblings, 1 reply; 15+ messages in thread
From: Shaohua Li @ 2015-03-12 17:32 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Dan Williams, Tony Battersby, Jens Axboe,
	IDE/ATA development list, Christoph Hellwig, linux-kernel

On Thu, Mar 12, 2015 at 09:59:26AM -0400, Tejun Heo wrote:
> On Thu, Mar 12, 2015 at 05:46:01AM -0700, Shaohua Li wrote:
> > ata: Add a new flag for sas controller
> > 
> > Add a new flag to destinguish sas controller. sas controller has its own tag
> > allocation, which doesn't directly match to ata tag. We use the new flag for
> > sas ata tag allocation.
> 
> This doesn't really explain why the patch came to be.  Can you please
> update it to include what broke how and got fixed?  A Fixes: tag would
> be nice too.

Is this better?


ata: Add a new flag to destinguish sas controller
 
SAS controller has its own tag allocation, which doesn't directly match to ATA
tag, so SAS and SATA have different code path for ata tags. Originally we use
port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set
->scsi_host too, so we can't use it for the destinguish, we add a new flag for
this purpose.

Reported-and-tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Shaohua Li <shli@fb.com>

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4c35f08..ef150eb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 		return NULL;
 
 	/* libsas case */
-	if (!ap->scsi_host) {
+	if (ap->flags & ATA_FLAG_SAS_HOST) {
 		tag = ata_sas_allocate_tag(ap);
 		if (tag < 0)
 			return NULL;
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		if (!ap->scsi_host)
+		if (ap->flags & ATA_FLAG_SAS_HOST)
 			ata_sas_free_tag(tag, ap);
 	}
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9219953..d9afc51 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+			  ATA_FLAG_SAS_HOST,
 	.pio_mask	= ATA_PIO4_ONLY,
 	.mwdma_mask	= ATA_MWDMA2,
 	.udma_mask	= ATA_UDMA6,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc..9c706d8 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+		 ATA_FLAG_SAS_HOST,
 	.pio_mask = ATA_PIO4,
 	.mwdma_mask = ATA_MWDMA2,
 	.udma_mask = ATA_UDMA6,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index fc03efa..6b08cc1 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -232,6 +232,7 @@ enum {
 					      * led */
 	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
+	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH libata/for-4.0-fixes] ata: Add a new flag to destinguish sas controller
  2015-03-12 17:32               ` Shaohua Li
@ 2015-03-19 18:17                 ` Tejun Heo
  0 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2015-03-19 18:17 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Dan Williams, Tony Battersby, Jens Axboe,
	IDE/ATA development list, Christoph Hellwig, linux-kernel

>From 5067c0469c643512f24786990e315f9c15cc7d24 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 12 Mar 2015 10:32:18 -0700

SAS controller has its own tag allocation, which doesn't directly match to ATA
tag, so SAS and SATA have different code path for ata tags. Originally we use
port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set
->scsi_host too, so we can't use it for the destinguish, we add a new flag for
this purpose.

Without this patch, the following oops can happen because scsi-mq uses
a host-wide tag map shared among all devices with some integer tag
values >= ATA_MAX_QUEUE.  These unexpectedly high tag values cause
__ata_qc_from_tag() to return NULL, which is then dereferenced in
ata_qc_new_init().

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
  IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
  PGD 32adf0067 PUD 32adf1067 PMD 0
  Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
  Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
  i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
  eeprom w83795 i2c_i801
  CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
  Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
  task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
  RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
  RSP: 0018:ffff88032a067858  EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
  RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
  RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
  R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
  FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
  Stack:
   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
  Call Trace:
   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
   [<ffffffff80689932>] system_call_fastpath+0x12/0x17

tj: updated description.

Fixes: 12cb5ce101ab ("libata: use blk taging")
Reported-and-tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
Applied with description updated.

Thanks.

 drivers/ata/libata-core.c     | 4 ++--
 drivers/scsi/ipr.c            | 3 ++-
 drivers/scsi/libsas/sas_ata.c | 3 ++-
 include/linux/libata.h        | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4c35f08..ef150eb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 		return NULL;
 
 	/* libsas case */
-	if (!ap->scsi_host) {
+	if (ap->flags & ATA_FLAG_SAS_HOST) {
 		tag = ata_sas_allocate_tag(ap);
 		if (tag < 0)
 			return NULL;
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		if (!ap->scsi_host)
+		if (ap->flags & ATA_FLAG_SAS_HOST)
 			ata_sas_free_tag(tag, ap);
 	}
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9219953..d9afc51 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+			  ATA_FLAG_SAS_HOST,
 	.pio_mask	= ATA_PIO4_ONLY,
 	.mwdma_mask	= ATA_MWDMA2,
 	.udma_mask	= ATA_UDMA6,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc..9c706d8 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+		 ATA_FLAG_SAS_HOST,
 	.pio_mask = ATA_PIO4,
 	.mwdma_mask = ATA_MWDMA2,
 	.udma_mask = ATA_UDMA6,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index fc03efa..6b08cc1 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -232,6 +232,7 @@ enum {
 					      * led */
 	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
+	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-03-19 18:17 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-11 18:15 [PATCH] libata: revert "libata: use blk taging" et al Tony Battersby
2015-03-11 21:45 ` Jens Axboe
2015-03-11 21:45   ` Jens Axboe
2015-03-11 22:19   ` Tony Battersby
2015-03-12  1:53     ` Shaohua Li
2015-03-12  1:53       ` Shaohua Li
2015-03-12  2:31     ` Shaohua Li
2015-03-12  2:31       ` Shaohua Li
2015-03-12 10:14       ` Dan Williams
2015-03-12 12:11         ` Tejun Heo
2015-03-12 12:46           ` Shaohua Li
2015-03-12 13:51             ` Tony Battersby
2015-03-12 13:59             ` Tejun Heo
2015-03-12 17:32               ` Shaohua Li
2015-03-19 18:17                 ` [PATCH libata/for-4.0-fixes] ata: Add a new flag to destinguish sas controller Tejun Heo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.