All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] libata: switch to block layer tagging
@ 2009-05-20  6:59 Jens Axboe
  2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Jens Axboe @ 2009-05-20  6:59 UTC (permalink / raw)
  To: linux-ide; +Cc: jeff, htejun

Hi,

This patchset switches libata to block layer tagging. It gets rid of yet
another O(N) command loop.

 block/blk-tag.c           |   99 +++++++++++++++++++++++++++-----------
 drivers/ata/libata-core.c |   94 ++++++++++--------------------------
 drivers/ata/libata-scsi.c |   23 ++++++++
 drivers/ata/libata.h      |   19 ++++++-
 include/linux/blkdev.h    |    3 +
 include/linux/libata.h    |    1 
 6 files changed, 139 insertions(+), 100 deletions(-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20  6:59 [PATCH 0/2] libata: switch to block layer tagging Jens Axboe
@ 2009-05-20  7:00 ` Jens Axboe
  2009-05-20 11:53   ` Tejun Heo
                     ` (2 more replies)
  2009-05-20  7:01 ` [PATCH 2/2] block: add function for waiting for a specific free tag Jens Axboe
  2009-05-20  7:53 ` [PATCH 0/2] libata: switch to block layer tagging Jeff Garzik
  2 siblings, 3 replies; 23+ messages in thread
From: Jens Axboe @ 2009-05-20  7:00 UTC (permalink / raw)
  To: linux-ide; +Cc: jeff, htejun


libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
a free tag to use. Instead of fixing that up, convert libata to
using block layer tagging - gets rid of code in libata, and is also
much faster.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/ata/libata-core.c |   65 ++++----------------------------------------
 drivers/ata/libata-scsi.c |   23 ++++++++++++++-
 drivers/ata/libata.h      |   19 ++++++++++++-
 include/linux/libata.h    |    1 -
 4 files changed, 44 insertions(+), 64 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8de0081..cc67307 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1789,8 +1789,6 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	else
 		tag = 0;
 
-	if (test_and_set_bit(tag, &ap->qc_allocated))
-		BUG();
 	qc = __ata_qc_from_tag(ap, tag);
 
 	qc->tag = tag;
@@ -4783,36 +4781,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 }
 
 /**
- *	ata_qc_new - Request an available ATA command, for queueing
- *	@ap: target port
- *
- *	LOCKING:
- *	None.
- */
-
-static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
-{
-	struct ata_queued_cmd *qc = NULL;
-	unsigned int i;
-
-	/* no command while frozen */
-	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
-		return NULL;
-
-	/* the last tag is reserved for internal command. */
-	for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
-		if (!test_and_set_bit(i, &ap->qc_allocated)) {
-			qc = __ata_qc_from_tag(ap, i);
-			break;
-		}
-
-	if (qc)
-		qc->tag = i;
-
-	return qc;
-}
-
-/**
  *	ata_qc_new_init - Request an available ATA command, and initialize it
  *	@dev: Device from whom we request an available command structure
  *
@@ -4820,16 +4788,20 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
  *	None.
  */
 
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
+struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 {
 	struct ata_port *ap = dev->link->ap;
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new(ap);
+	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+		return NULL;
+
+	qc = __ata_qc_from_tag(ap, tag);
 	if (qc) {
 		qc->scsicmd = NULL;
 		qc->ap = ap;
 		qc->dev = dev;
+		qc->tag = tag;
 
 		ata_qc_reinit(qc);
 	}
@@ -4837,31 +4809,6 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
 	return qc;
 }
 
-/**
- *	ata_qc_free - free unused ata_queued_cmd
- *	@qc: Command to complete
- *
- *	Designed to free unused ata_queued_cmd object
- *	in case something prevents using it.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-void ata_qc_free(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	unsigned int tag;
-
-	WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-
-	qc->flags = 0;
-	tag = qc->tag;
-	if (likely(ata_tag_valid(tag))) {
-		qc->tag = ATA_TAG_POISON;
-		clear_bit(tag, &ap->qc_allocated);
-	}
-}
-
 void __ata_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 2733b0c..04f7a8d 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -742,7 +742,11 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
 {
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new_init(dev);
+	if (cmd->request->tag != -1)
+		qc = ata_qc_new_init(dev, cmd->request->tag);
+	else
+		qc = ata_qc_new_init(dev, 0);
+
 	if (qc) {
 		qc->scsicmd = cmd;
 		qc->scsidone = done;
@@ -1137,7 +1141,17 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 
 		depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
 		depth = min(ATA_MAX_QUEUE - 1, depth);
-		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
+
+		/*
+		 * If this device is behind a port multiplier, we have
+		 * to share the tag map between all devices on that PMP.
+		 * Set up the shared tag map here and we get automatic.
+		 */
+		if (dev->link->ap->pmp_link)
+			scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1);
+
+		scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
+		scsi_activate_tcq(sdev, depth);
 	}
 
 	return 0;
@@ -1990,6 +2004,11 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
 		hdr[1] |= (1 << 7);
 
 	memcpy(rbuf, hdr, sizeof(hdr));
+
+	/* if ncq, set tags supported */
+	if (ata_id_has_ncq(args->id))
+		rbuf[7] |= (1 << 1);
+
 	memcpy(&rbuf[8], "ATA     ", 8);
 	ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
 	ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 89a1e00..bad444b 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -74,7 +74,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
+extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
@@ -100,7 +100,6 @@ extern int ata_dev_configure(struct ata_device *dev);
 extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit);
 extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
 extern void ata_sg_clean(struct ata_queued_cmd *qc);
-extern void ata_qc_free(struct ata_queued_cmd *qc);
 extern void ata_qc_issue(struct ata_queued_cmd *qc);
 extern void __ata_qc_complete(struct ata_queued_cmd *qc);
 extern int atapi_check_dma(struct ata_queued_cmd *qc);
@@ -116,6 +115,22 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host);
 extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy);
 extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm);
 
+/**
+ *	ata_qc_free - free unused ata_queued_cmd
+ *	@qc: Command to complete
+ *
+ *	Designed to free unused ata_queued_cmd object
+ *	in case something prevents using it.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+static inline void ata_qc_free(struct ata_queued_cmd *qc)
+{
+	qc->flags = 0;
+	qc->tag = ATA_TAG_POISON;
+}
+
 /* libata-acpi.c */
 #ifdef CONFIG_ATA_ACPI
 extern void ata_acpi_associate_sata_port(struct ata_port *ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3d501db..cf1e54e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -716,7 +716,6 @@ struct ata_port {
 	unsigned int		cbl;	/* cable type; ATA_CBL_xxx */
 
 	struct ata_queued_cmd	qcmd[ATA_MAX_QUEUE];
-	unsigned long		qc_allocated;
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
 
-- 
1.6.3.rc0.1.gf800

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/2] block: add function for waiting for a specific free tag
  2009-05-20  6:59 [PATCH 0/2] libata: switch to block layer tagging Jens Axboe
  2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
@ 2009-05-20  7:01 ` Jens Axboe
  2009-05-20 11:55   ` Tejun Heo
  2009-05-20 17:28   ` [PATCH 2/2] block: add function for waiting for a specific free tag Grant Grundler
  2009-05-20  7:53 ` [PATCH 0/2] libata: switch to block layer tagging Jeff Garzik
  2 siblings, 2 replies; 23+ messages in thread
From: Jens Axboe @ 2009-05-20  7:01 UTC (permalink / raw)
  To: linux-ide; +Cc: jeff, htejun


We need this in libata to ensure that we don't race between internal
tag usage and the block layer usage.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-tag.c           |   99 ++++++++++++++++++++++++++++++++-------------
 drivers/ata/libata-core.c |   29 +++++++++----
 include/linux/blkdev.h    |    3 +
 3 files changed, 95 insertions(+), 36 deletions(-)

diff --git a/block/blk-tag.c b/block/blk-tag.c
index e9a7501..208468b 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -149,6 +149,7 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
 		goto fail;
 
 	atomic_set(&tags->refcnt, 1);
+	init_waitqueue_head(&tags->waitq);
 	return tags;
 fail:
 	kfree(tags);
@@ -264,6 +265,65 @@ int blk_queue_resize_tags(struct request_queue *q, int new_depth)
 }
 EXPORT_SYMBOL(blk_queue_resize_tags);
 
+void blk_queue_acquire_tag(struct request_queue *q, int tag)
+{
+	struct blk_queue_tag *bqt;
+
+	if (!blk_queue_tagged(q) || !q->queue_tags)
+		return;
+
+	bqt = q->queue_tags;
+	do {
+		DEFINE_WAIT(wait);
+
+		if (!test_and_set_bit_lock(tag, bqt->tag_map))
+			break;
+
+		prepare_to_wait(&bqt->waitq, &wait, TASK_UNINTERRUPTIBLE);
+		if (test_and_set_bit_lock(tag, bqt->tag_map)) {
+			spin_unlock_irq(q->queue_lock);
+			schedule();
+			spin_lock_irq(q->queue_lock);
+		}
+		finish_wait(&bqt->waitq, &wait);
+	} while (1);
+}
+
+void blk_queue_release_tag(struct request_queue *q, int tag)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+
+	if (!blk_queue_tagged(q))
+		return;
+
+	/*
+	 * Normally we store a request pointer in the tag index, but for
+	 * blk_queue_acquire_tag() usage, we may not have something specific
+	 * assigned to the tag slot. In any case, be safe and clear it.
+	 */
+	bqt->tag_index[tag] = NULL;
+
+	if (unlikely(!test_bit(tag, bqt->tag_map))) {
+		printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
+		       __func__, tag);
+		return;
+	}
+	/*
+	 * The tag_map bit acts as a lock for tag_index[bit], so we need
+	 * unlock memory barrier semantics.
+	 */
+	clear_bit_unlock(tag, bqt->tag_map);
+
+	/*
+	 * We don't need a memory barrier here, since we have the bit lock
+	 * ordering above. Otherwise it would need an smp_mb();
+	 */
+	if (waitqueue_active(&bqt->waitq))
+		wake_up(&bqt->waitq);
+
+}
+EXPORT_SYMBOL(blk_queue_release_tag);
+
 /**
  * blk_queue_end_tag - end tag operations for a request
  * @q:  the request queue for the device
@@ -285,33 +345,17 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 
 	BUG_ON(tag == -1);
 
-	if (unlikely(tag >= bqt->real_max_depth))
-		/*
-		 * This can happen after tag depth has been reduced.
-		 * FIXME: how about a warning or info message here?
-		 */
-		return;
-
-	list_del_init(&rq->queuelist);
-	rq->cmd_flags &= ~REQ_QUEUED;
-	rq->tag = -1;
-
-	if (unlikely(bqt->tag_index[tag] == NULL))
-		printk(KERN_ERR "%s: tag %d is missing\n",
-		       __func__, tag);
-
-	bqt->tag_index[tag] = NULL;
-
-	if (unlikely(!test_bit(tag, bqt->tag_map))) {
-		printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
-		       __func__, tag);
-		return;
-	}
 	/*
-	 * The tag_map bit acts as a lock for tag_index[bit], so we need
-	 * unlock memory barrier semantics.
+	 * When the tag depth is being reduced, we don't wait for higher tags
+	 * to finish. So we could see this triggering without it being an error.
 	 */
-	clear_bit_unlock(tag, bqt->tag_map);
+	if (tag < bqt->real_max_depth) {
+		list_del_init(&rq->queuelist);
+		rq->cmd_flags &= ~REQ_QUEUED;
+		rq->tag = -1;
+
+		blk_queue_release_tag(q, tag);
+	}
 }
 EXPORT_SYMBOL(blk_queue_end_tag);
 
@@ -336,8 +380,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
-	unsigned max_depth;
-	int tag;
+	int max_depth, tag;
 
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
 		printk(KERN_ERR
@@ -371,7 +414,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	} while (test_and_set_bit_lock(tag, bqt->tag_map));
 	/*
 	 * We need lock ordering semantics given by test_and_set_bit_lock.
-	 * See blk_queue_end_tag for details.
+	 * See blk_queue_release_tag() for details.
 	 */
 
 	rq->cmd_flags |= REQ_QUEUED;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index cc67307..c45dd12 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -61,6 +61,7 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
 #include <linux/libata.h>
 #include <asm/byteorder.h>
 #include <linux/cdrom.h>
@@ -1765,15 +1766,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	u32 preempted_sactive, preempted_qc_active;
 	int preempted_nr_active_links;
 	DECLARE_COMPLETION_ONSTACK(wait);
-	unsigned long flags;
 	unsigned int err_mask;
 	int rc;
 
-	spin_lock_irqsave(ap->lock, flags);
+	spin_lock_irq(ap->lock);
 
 	/* no internal command while frozen */
 	if (ap->pflags & ATA_PFLAG_FROZEN) {
-		spin_unlock_irqrestore(ap->lock, flags);
+		spin_unlock_irq(ap->lock);
 		return AC_ERR_SYSTEM;
 	}
 
@@ -1789,6 +1789,16 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	else
 		tag = 0;
 
+	/*
+	 * We could be racing with the tag freeing in the block layer, so
+	 * we need to ensure that our tag is free.
+	 */
+	if (dev->sdev && dev->sdev->request_queue)
+		blk_queue_acquire_tag(dev->sdev->request_queue, tag);
+
+	/*
+	 * The tag is now ours
+	 */
 	qc = __ata_qc_from_tag(ap, tag);
 
 	qc->tag = tag;
@@ -1828,7 +1838,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 
 	ata_qc_issue(qc);
 
-	spin_unlock_irqrestore(ap->lock, flags);
+	spin_unlock_irq(ap->lock);
 
 	if (!timeout) {
 		if (ata_probe_timeout)
@@ -1844,7 +1854,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	ata_port_flush_task(ap);
 
 	if (!rc) {
-		spin_lock_irqsave(ap->lock, flags);
+		spin_lock_irq(ap->lock);
 
 		/* We're racing with irq here.  If we lose, the
 		 * following test prevents us from completing the qc
@@ -1864,7 +1874,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 					"qc timeout (cmd 0x%x)\n", command);
 		}
 
-		spin_unlock_irqrestore(ap->lock, flags);
+		spin_unlock_irq(ap->lock);
 	}
 
 	/* do post_internal_cmd */
@@ -1884,11 +1894,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	}
 
 	/* finish up */
-	spin_lock_irqsave(ap->lock, flags);
+	spin_lock_irq(ap->lock);
 
 	*tf = qc->result_tf;
 	err_mask = qc->err_mask;
 
+	if (dev->sdev && dev->sdev->request_queue)
+		blk_queue_release_tag(dev->sdev->request_queue, tag);
+
 	ata_qc_free(qc);
 	link->active_tag = preempted_tag;
 	link->sactive = preempted_sactive;
@@ -1911,7 +1924,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 		ata_port_probe(ap);
 	}
 
-	spin_unlock_irqrestore(ap->lock, flags);
+	spin_unlock_irq(ap->lock);
 
 	if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
 		ata_internal_cmd_timed_out(dev, command);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca322da..f2b6b92 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -307,6 +307,7 @@ struct blk_queue_tag {
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
+	wait_queue_head_t waitq;	/* for waiting on tags */
 };
 
 #define BLK_SCSI_MAX_CMDS	(256)
@@ -929,6 +930,8 @@ extern void blk_put_queue(struct request_queue *);
  * tag stuff
  */
 #define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
+extern void blk_queue_acquire_tag(struct request_queue *, int);
+extern void blk_queue_release_tag(struct request_queue *, int);
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
-- 
1.6.3.rc0.1.gf800

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/2] libata: switch to block layer tagging
  2009-05-20  6:59 [PATCH 0/2] libata: switch to block layer tagging Jens Axboe
  2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
  2009-05-20  7:01 ` [PATCH 2/2] block: add function for waiting for a specific free tag Jens Axboe
@ 2009-05-20  7:53 ` Jeff Garzik
  2009-05-20  7:57   ` Jens Axboe
  2 siblings, 1 reply; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20  7:53 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, htejun

Jens Axboe wrote:
> Hi,
> 
> This patchset switches libata to block layer tagging. It gets rid of yet
> another O(N) command loop.
> 
>  block/blk-tag.c           |   99 +++++++++++++++++++++++++++-----------
>  drivers/ata/libata-core.c |   94 ++++++++++--------------------------
>  drivers/ata/libata-scsi.c |   23 ++++++++
>  drivers/ata/libata.h      |   19 ++++++-
>  include/linux/blkdev.h    |    3 +
>  include/linux/libata.h    |    1 
>  6 files changed, 139 insertions(+), 100 deletions(-)

Will take a look post-sleep...  mainly it's a question of possibly 
interfering with SCSI's use of block layer tagging.  Though for the 
moment I will simply assume you verified that ;)

	Jeff



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/2] libata: switch to block layer tagging
  2009-05-20  7:53 ` [PATCH 0/2] libata: switch to block layer tagging Jeff Garzik
@ 2009-05-20  7:57   ` Jens Axboe
  0 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2009-05-20  7:57 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: linux-ide, htejun

On Wed, May 20 2009, Jeff Garzik wrote:
> Jens Axboe wrote:
>> Hi,
>>
>> This patchset switches libata to block layer tagging. It gets rid of yet
>> another O(N) command loop.
>>
>>  block/blk-tag.c           |   99 +++++++++++++++++++++++++++-----------
>>  drivers/ata/libata-core.c |   94 ++++++++++--------------------------
>>  drivers/ata/libata-scsi.c |   23 ++++++++
>>  drivers/ata/libata.h      |   19 ++++++-
>>  include/linux/blkdev.h    |    3 +
>>  include/linux/libata.h    |    1  6 files changed, 139 insertions(+), 
>> 100 deletions(-)
>
> Will take a look post-sleep...  mainly it's a question of possibly  

:-)

> interfering with SCSI's use of block layer tagging.  Though for the  
> moment I will simply assume you verified that ;)

Certainly, the patch has been used and sitting in my ssd branch for
months now. It essentially just tells SCSI to enable the block layer
tagging for libata devices with ncq. Then libata gets block layer tagged
requests automatically, and it can simply reuse the assigned tag
internally as well.

That part is trivial. The nasty bit is when libata makes up its own tag
for internal issue, that is what patch #2 covers.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
@ 2009-05-20 11:53   ` Tejun Heo
  2009-05-20 17:10   ` Grant Grundler
  2009-06-10 15:11   ` Jeff Garzik
  2 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2009-05-20 11:53 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, jeff

Hello, Jens.

Jens Axboe wrote:
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 2733b0c..04f7a8d 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -742,7 +742,11 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
>  {
>  	struct ata_queued_cmd *qc;
>  
> -	qc = ata_qc_new_init(dev);
> +	if (cmd->request->tag != -1)
> +		qc = ata_qc_new_init(dev, cmd->request->tag);
> +	else
> +		qc = ata_qc_new_init(dev, 0);
> +
>  	if (qc) {
>  		qc->scsicmd = cmd;
>  		qc->scsidone = done;
> @@ -1137,7 +1141,17 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
>  
>  		depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
>  		depth = min(ATA_MAX_QUEUE - 1, depth);
> -		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
> +
> +		/*
> +		 * If this device is behind a port multiplier, we have
> +		 * to share the tag map between all devices on that PMP.
> +		 * Set up the shared tag map here and we get automatic.
> +		 */
> +		if (dev->link->ap->pmp_link)
> +			scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1);
> +
> +		scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
> +		scsi_activate_tcq(sdev, depth);
>  	}

I don't think this is quite correct.  If a !NCQ device is behind PMP,
the queue won't be tagged and all requests for the device would get -1
blk tag which gets translated to 0 libata tag, which may be in use by
other NCQ or non-NCQ device behind the same PMP.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/2] block: add function for waiting for a specific free tag
  2009-05-20  7:01 ` [PATCH 2/2] block: add function for waiting for a specific free tag Jens Axboe
@ 2009-05-20 11:55   ` Tejun Heo
  2009-05-20 19:34     ` old-EH and SAS (was Re: [PATCH 2/2] block: add function for waiting for a specific free tag) Jeff Garzik
  2009-05-20 17:28   ` [PATCH 2/2] block: add function for waiting for a specific free tag Grant Grundler
  1 sibling, 1 reply; 23+ messages in thread
From: Tejun Heo @ 2009-05-20 11:55 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, jeff

Jens Axboe wrote:
> We need this in libata to ensure that we don't race between internal
> tag usage and the block layer usage.

Hmm... I can't think of a race condition.  Can you please elaborate
what can cause such race condition and how it can get resolved by
waiting for in-flight tag?  Currently, libata reserves a qc for EH so
there won't be any race around it (the old-EH path is broken if
invoked with commands in flight anyway, so doesn't matter).  Also, as
all failed rq's are on hold till EH finishes, if there's a race
condition around a tag, it will become a deadlock.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
  2009-05-20 11:53   ` Tejun Heo
@ 2009-05-20 17:10   ` Grant Grundler
  2009-05-20 18:08     ` Gwendal Grignou
  2009-05-20 19:09     ` Jeff Garzik
  2009-06-10 15:11   ` Jeff Garzik
  2 siblings, 2 replies; 23+ messages in thread
From: Grant Grundler @ 2009-05-20 17:10 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, jeff, htejun

On Wed, May 20, 2009 at 12:00 AM, Jens Axboe <jens.axboe@oracle.com> wrote:
> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
> a free tag to use. Instead of fixing that up, convert libata to
> using block layer tagging - gets rid of code in libata, and is also
> much faster.
...
> @@ -1137,7 +1141,17 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
>
>                depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
>                depth = min(ATA_MAX_QUEUE - 1, depth);
> -               scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
> +
> +               /*
> +                * If this device is behind a port multiplier, we have
> +                * to share the tag map between all devices on that PMP.
> +                * Set up the shared tag map here and we get automatic.

Automatic what?

> +                */
> +               if (dev->link->ap->pmp_link)
> +                       scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1);
> +
> +               scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
> +               scsi_activate_tcq(sdev, depth);

I just read Tejun's reply and it sounds right what he's saying.
But can SATA controllers handle NCQ and !NCQ devices on the same port?
Can the PMP handle it?

If both can, I don't understand how a mixed config works today.

TBH, this isn't something I'm very worried about since most commercial configs
will be homogenous (think HW replacement/support costs).

hth,
grant

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/2] block: add function for waiting for a specific free tag
  2009-05-20  7:01 ` [PATCH 2/2] block: add function for waiting for a specific free tag Jens Axboe
  2009-05-20 11:55   ` Tejun Heo
@ 2009-05-20 17:28   ` Grant Grundler
  1 sibling, 0 replies; 23+ messages in thread
From: Grant Grundler @ 2009-05-20 17:28 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, jeff, htejun

On Wed, May 20, 2009 at 12:01 AM, Jens Axboe <jens.axboe@oracle.com> wrote:
>
> We need this in libata to ensure that we don't race between internal
> tag usage and the block layer usage.
>
> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> ---
>  block/blk-tag.c           |   99 ++++++++++++++++++++++++++++++++-------------
>  drivers/ata/libata-core.c |   29 +++++++++----
>  include/linux/blkdev.h    |    3 +
>  3 files changed, 95 insertions(+), 36 deletions(-)
>
> diff --git a/block/blk-tag.c b/block/blk-tag.c
> index e9a7501..208468b 100644
> --- a/block/blk-tag.c
> +++ b/block/blk-tag.c
> @@ -149,6 +149,7 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
>                goto fail;
>
>        atomic_set(&tags->refcnt, 1);
> +       init_waitqueue_head(&tags->waitq);
>        return tags;
>  fail:
>        kfree(tags);
> @@ -264,6 +265,65 @@ int blk_queue_resize_tags(struct request_queue *q, int new_depth)
>  }
>  EXPORT_SYMBOL(blk_queue_resize_tags);
>
> +void blk_queue_acquire_tag(struct request_queue *q, int tag)
> +{
> +       struct blk_queue_tag *bqt;
> +
> +       if (!blk_queue_tagged(q) || !q->queue_tags)
> +               return;
> +
> +       bqt = q->queue_tags;
> +       do {
> +               DEFINE_WAIT(wait);
> +
> +               if (!test_and_set_bit_lock(tag, bqt->tag_map))
> +                       break;
> +
> +               prepare_to_wait(&bqt->waitq, &wait, TASK_UNINTERRUPTIBLE);
> +               if (test_and_set_bit_lock(tag, bqt->tag_map)) {
> +                       spin_unlock_irq(q->queue_lock);
> +                       schedule();
> +                       spin_lock_irq(q->queue_lock);
> +               }
> +               finish_wait(&bqt->waitq, &wait);
> +       } while (1);
> +}

Should blk_queue_acquire_tag() also be EXPORT_SYMBOL like
blk_queue_release_tag() is?

> +
> +void blk_queue_release_tag(struct request_queue *q, int tag)
> +{
...
> +}
> +EXPORT_SYMBOL(blk_queue_release_tag);

thanks,
grant

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 17:10   ` Grant Grundler
@ 2009-05-20 18:08     ` Gwendal Grignou
  2009-05-20 18:50       ` James Bottomley
  2009-05-20 18:51       ` Jeff Garzik
  2009-05-20 19:09     ` Jeff Garzik
  1 sibling, 2 replies; 23+ messages in thread
From: Gwendal Grignou @ 2009-05-20 18:08 UTC (permalink / raw)
  To: Grant Grundler; +Cc: Jens Axboe, linux-ide, jeff, htejun

Form the ATA and SATA spec, NCQ is per device. It is possible to
assign the same tag on different port, the disks and PMP will not
care.
However, today, we assign tag on a port basis [see
__ata_qc_from_tag()], therefore only 32 commands can be inflight to
all devices behind a given port.

Being able to a do a mapping qc <---> {pmp port, tag} instead of just
qc <---> {tag} will provide a performance boost when disks supporting
NCQ are connected behind a PMP. Maybe it can be done, by moving qcmd
from ata_port to ata_link.

As Tejun said, the patch needs more work to be able to support same
tag used on 2 different links behind the same port.
Also, given we are changing the amount of commands we sent to the
controller, we would have to change can_queue from ATA_MAX_QUEUE to
ATA_MAX_QUEUE*n, where n is the number of ports supported by the
controller [max SATA_PMP_MAX_PORTS, but some controller, like SiI3132
only supports 5 devices, and other may only support n = 1]

When done, the patch will require a great amount of testing, given we
will exercise the controllers in a brand new way. A white list might
be necessary.

Jens, using SCSI tagging for ata commands is a great idea, but it is
no small feat...

Gwendal.

On Wed, May 20, 2009 at 10:10 AM, Grant Grundler <grundler@google.com> wrote:
> On Wed, May 20, 2009 at 12:00 AM, Jens Axboe <jens.axboe@oracle.com> wrote:
>> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
>> a free tag to use. Instead of fixing that up, convert libata to
>> using block layer tagging - gets rid of code in libata, and is also
>> much faster.
> ...
>> @@ -1137,7 +1141,17 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
>>
>>                depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
>>                depth = min(ATA_MAX_QUEUE - 1, depth);
>> -               scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
>> +
>> +               /*
>> +                * If this device is behind a port multiplier, we have
>> +                * to share the tag map between all devices on that PMP.
>> +                * Set up the shared tag map here and we get automatic.
>
> Automatic what?
>
>> +                */
>> +               if (dev->link->ap->pmp_link)
>> +                       scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1);
>> +
>> +               scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
>> +               scsi_activate_tcq(sdev, depth);
>
> I just read Tejun's reply and it sounds right what he's saying.
> But can SATA controllers handle NCQ and !NCQ devices on the same port?
> Can the PMP handle it?
>
> If both can, I don't understand how a mixed config works today.
>
> TBH, this isn't something I'm very worried about since most commercial configs
> will be homogenous (think HW replacement/support costs).
>
> hth,
> grant
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ide" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 18:08     ` Gwendal Grignou
@ 2009-05-20 18:50       ` James Bottomley
  2009-05-20 18:58         ` Jeff Garzik
  2009-05-20 18:51       ` Jeff Garzik
  1 sibling, 1 reply; 23+ messages in thread
From: James Bottomley @ 2009-05-20 18:50 UTC (permalink / raw)
  To: Gwendal Grignou; +Cc: Grant Grundler, Jens Axboe, linux-ide, jeff, htejun

On Wed, 2009-05-20 at 11:08 -0700, Gwendal Grignou wrote:
> Form the ATA and SATA spec, NCQ is per device. It is possible to
> assign the same tag on different port, the disks and PMP will not
> care.
> However, today, we assign tag on a port basis [see
> __ata_qc_from_tag()], therefore only 32 commands can be inflight to
> all devices behind a given port.
> 
> Being able to a do a mapping qc <---> {pmp port, tag} instead of just
> qc <---> {tag} will provide a performance boost when disks supporting
> NCQ are connected behind a PMP. Maybe it can be done, by moving qcmd
> from ata_port to ata_link.
> 
> As Tejun said, the patch needs more work to be able to support same
> tag used on 2 different links behind the same port.
> Also, given we are changing the amount of commands we sent to the
> controller, we would have to change can_queue from ATA_MAX_QUEUE to
> ATA_MAX_QUEUE*n, where n is the number of ports supported by the
> controller [max SATA_PMP_MAX_PORTS, but some controller, like SiI3132
> only supports 5 devices, and other may only support n = 1]
> 
> When done, the patch will require a great amount of testing, given we
> will exercise the controllers in a brand new way. A white list might
> be necessary.
> 
> Jens, using SCSI tagging for ata commands is a great idea, but it is
> no small feat...

So realistically, you want one block queue per PMP port rather than an
artificial limits adjustment on the single queue per output phy ... this
shouldn't be too hard: it is exactly the way the SAS transport class
(and libsas) works today for expander connected SAS devices.

James



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 18:08     ` Gwendal Grignou
  2009-05-20 18:50       ` James Bottomley
@ 2009-05-20 18:51       ` Jeff Garzik
  1 sibling, 0 replies; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20 18:51 UTC (permalink / raw)
  To: Gwendal Grignou; +Cc: Grant Grundler, Jens Axboe, linux-ide, htejun

Gwendal Grignou wrote:
> Form the ATA and SATA spec, NCQ is per device. It is possible to
> assign the same tag on different port, the disks and PMP will not
> care.
> However, today, we assign tag on a port basis [see
> __ata_qc_from_tag()], therefore only 32 commands can be inflight to
> all devices behind a given port.
> 
> Being able to a do a mapping qc <---> {pmp port, tag} instead of just
> qc <---> {tag} will provide a performance boost when disks supporting
> NCQ are connected behind a PMP. Maybe it can be done, by moving qcmd
> from ata_port to ata_link.
> 
> As Tejun said, the patch needs more work to be able to support same
> tag used on 2 different links behind the same port.
> Also, given we are changing the amount of commands we sent to the
> controller, we would have to change can_queue from ATA_MAX_QUEUE to
> ATA_MAX_QUEUE*n, where n is the number of ports supported by the
> controller [max SATA_PMP_MAX_PORTS, but some controller, like SiI3132
> only supports 5 devices, and other may only support n = 1]
> 
> When done, the patch will require a great amount of testing, given we
> will exercise the controllers in a brand new way. A white list might
> be necessary.

Well, I think a few things are getting confused, here.

Assigning 32 tags to each port is the best we can do, given existing 
hardware, which provides command slots on a per-port basis.

can_queue is a largely a host-wide limit that does not really apply to 
modern SATA, because we (and Jens patch) set queue depth properly. 
can_queue is mainly used these days for master/slave arbitration, and we 
mainly need to ensure that can_queue does not add additional, unneeded 
limits on top of the more fine-grained limits being configured.

	Jeff





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 18:50       ` James Bottomley
@ 2009-05-20 18:58         ` Jeff Garzik
  2009-05-20 19:04           ` Jeff Garzik
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20 18:58 UTC (permalink / raw)
  To: James Bottomley
  Cc: Gwendal Grignou, Grant Grundler, Jens Axboe, linux-ide, htejun

James Bottomley wrote:
> On Wed, 2009-05-20 at 11:08 -0700, Gwendal Grignou wrote:
>> Form the ATA and SATA spec, NCQ is per device. It is possible to
>> assign the same tag on different port, the disks and PMP will not
>> care.
>> However, today, we assign tag on a port basis [see
>> __ata_qc_from_tag()], therefore only 32 commands can be inflight to
>> all devices behind a given port.
>>
>> Being able to a do a mapping qc <---> {pmp port, tag} instead of just
>> qc <---> {tag} will provide a performance boost when disks supporting
>> NCQ are connected behind a PMP. Maybe it can be done, by moving qcmd
>> from ata_port to ata_link.
>>
>> As Tejun said, the patch needs more work to be able to support same
>> tag used on 2 different links behind the same port.
>> Also, given we are changing the amount of commands we sent to the
>> controller, we would have to change can_queue from ATA_MAX_QUEUE to
>> ATA_MAX_QUEUE*n, where n is the number of ports supported by the
>> controller [max SATA_PMP_MAX_PORTS, but some controller, like SiI3132
>> only supports 5 devices, and other may only support n = 1]
>>
>> When done, the patch will require a great amount of testing, given we
>> will exercise the controllers in a brand new way. A white list might
>> be necessary.
>>
>> Jens, using SCSI tagging for ata commands is a great idea, but it is
>> no small feat...
> 
> So realistically, you want one block queue per PMP port rather than an
> artificial limits adjustment on the single queue per output phy ... this
> shouldn't be too hard: it is exactly the way the SAS transport class
> (and libsas) works today for expander connected SAS devices.

Well...   The limiting factor is "ATA command slots per SATA phy", which 
defines $N active ATA commands per port.  If a SATA NCQ device is 
attached, you may have $N active ATA commands queued.  If two SATA NCQ 
devices are attached to a PMP, which is attached to the SATA controller, 
the two devices share the limit of $N active ATA commands queued.  If 32 
devices are attached to a PMP, all 32 devices share the $N command queue 
limit.

But additionally, as Tejun demonstrated, you might have a mix of NCQ and 
non-NCQ devices attached to the PMP.

Thus, it is a case of nested limits:

- $N maximum active commands per port; typically N==32
- $M maximum active commands per device; typically N==1 or N==32

Regards,

	Jeff





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 18:58         ` Jeff Garzik
@ 2009-05-20 19:04           ` Jeff Garzik
  2009-05-20 19:42             ` Gwendal Grignou
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20 19:04 UTC (permalink / raw)
  To: James Bottomley
  Cc: Gwendal Grignou, Grant Grundler, Jens Axboe, linux-ide, htejun

Jeff Garzik wrote:
> Well...   The limiting factor is "ATA command slots per SATA phy", which 
> defines $N active ATA commands per port.  If a SATA NCQ device is 
> attached, you may have $N active ATA commands queued.  If two SATA NCQ 
> devices are attached to a PMP, which is attached to the SATA controller, 
> the two devices share the limit of $N active ATA commands queued.  If 32 
> devices are attached to a PMP, all 32 devices share the $N command queue 
> limit.
> 
> But additionally, as Tejun demonstrated, you might have a mix of NCQ and 
> non-NCQ devices attached to the PMP.
> 
> Thus, it is a case of nested limits:
> 
> - $N maximum active commands per port; typically N==32
> - $M maximum active commands per device; typically N==1 or N==32


Note that this applies /somewhat/ to mvsas as well:  mvsas has up to 16 
"ATA register sets" that the OS driver may assign to various ATA devices 
on a single phy.

This simulates the "command slot" concept on dedicated SATA+PMP+NCQ 
hardware like AHCI or sata_sil24.

If you attach more than 16 ATA devices to a PMP, which is theoretically 
possible, then mvsas MUST fail discovery, because it cannot address that 
many ATA devices.

But mvsas's different architecture means that it does not have similar 
"command slot" limits.

	Jeff




^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 17:10   ` Grant Grundler
  2009-05-20 18:08     ` Gwendal Grignou
@ 2009-05-20 19:09     ` Jeff Garzik
  1 sibling, 0 replies; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20 19:09 UTC (permalink / raw)
  To: Grant Grundler; +Cc: Jens Axboe, linux-ide, htejun

Grant Grundler wrote:
> On Wed, May 20, 2009 at 12:00 AM, Jens Axboe <jens.axboe@oracle.com> wrote:
>> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
>> a free tag to use. Instead of fixing that up, convert libata to
>> using block layer tagging - gets rid of code in libata, and is also
>> much faster.
> ...
>> @@ -1137,7 +1141,17 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
>>
>>                depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
>>                depth = min(ATA_MAX_QUEUE - 1, depth);
>> -               scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
>> +
>> +               /*
>> +                * If this device is behind a port multiplier, we have
>> +                * to share the tag map between all devices on that PMP.
>> +                * Set up the shared tag map here and we get automatic.
> 
> Automatic what?
> 
>> +                */
>> +               if (dev->link->ap->pmp_link)
>> +                       scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1);
>> +
>> +               scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
>> +               scsi_activate_tcq(sdev, depth);
> 
> I just read Tejun's reply and it sounds right what he's saying.
> But can SATA controllers handle NCQ and !NCQ devices on the same port?
> Can the PMP handle it?
> 
> If both can, I don't understand how a mixed config works today.

A mixed config over PMP works today by setting the proper depth iff NCQ 
(== will not send too many cmds to device, if !NCQ) and using link-wide 
tag allocation (== will not send too many cmds to link).

	Jeff





^ permalink raw reply	[flat|nested] 23+ messages in thread

* old-EH and SAS (was Re: [PATCH 2/2] block: add function for waiting for a specific free tag)
  2009-05-20 11:55   ` Tejun Heo
@ 2009-05-20 19:34     ` Jeff Garzik
  2009-05-21 16:34       ` Brian King
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20 19:34 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Jens Axboe, linux-ide, Brian King, linux-scsi


(changing subject...)

> there won't be any race around it (the old-EH path is broken if
> invoked with commands in flight anyway, so doesn't matter).  Also, as

Speaking of the old-EH...  as of 
67651ee5710c45ea62fae68b768d65395ccf47c2 there are no drivers/ata/* 
drivers remaining that use old-EH.

old-EH now exists _entirely_ for a couple SAS drivers, and it is an ugly 
hack, so I wanted to take a moment to think about SAS, SATA, and 
SATA+SAS error handling.

The currently we have a few distinct phy+link configurations that EH 
must deal with, and each requires its own implementation (this ignores 
legacy SFF and other non-phy topologies):

1) SATA PHY.  This is what libata EH handles now: direct control over 
SATA PHY and link.

2) SAS+SATA PHY.  Essentially a super-set of #1, this includes nested 
expander configurations, direct attachment of SAS or SATA, etc.  Uses 
libsas.

3) SAS+SATA firmware.  Not quite as "low level" as #2, does not use libsas.

Each one of these clearly should use the same code for configuring and 
managing ATA devices, including per-device EH.

Move up to the link level, and things start to get ugly.

_Ideally_, libsas should take over all of link exception handling from 
libata, except for the final-link PMP handling.  In reality, I think we 
will have to deal with libsas doing 100% of link EH including PMP 
handling, and libata will continue to perform link EH w/ PMP for !libsas 
hardware.

The integration of discovery is pretty poor -- you wind up with one glob 
of SATA devices and another glob of SCSI devices, with two separate 
EH+scan processes.  Ideally libsas should tell libata to scan a single 
SATA phy, and handle parallelism/exclusion in libsas for SATA+SAS 
configurations.

Brian King did a new-EH conversion for ipr, some time ago.  Maybe that 
work could be picked up, extended to libsas, and permit removal of all 
the old-EH code remaining in libata.

Maybe I should s/eng_timeout/sas_timeout/ to emphasize the current state 
of code ;-)

	Jeff





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 19:04           ` Jeff Garzik
@ 2009-05-20 19:42             ` Gwendal Grignou
  2009-05-20 19:47               ` Jeff Garzik
  2009-05-21 13:44               ` Mark Lord
  0 siblings, 2 replies; 23+ messages in thread
From: Gwendal Grignou @ 2009-05-20 19:42 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: James Bottomley, Grant Grundler, Jens Axboe, linux-ide, htejun

Jeff, you're right, I made some mistakes:
- Reading SiI3132 doc again, it only supports 32 command per port, so
the tags must be share among all the drives behind the same port.
But other chipset does support up to 128 commands per port.
- Jens' patch is working when all disks support NCQ. Only when we have
a mix of drives with or without NCQ we have a problem.

We can not have more than 16 devices per ata_port: a PMP can only have
15 device ports [Sata 2.6- 16.2], and we can not connect a PMP to
another one.

Gwendal.

On Wed, May 20, 2009 at 12:04 PM, Jeff Garzik <jeff@garzik.org> wrote:
> Jeff Garzik wrote:
>>
>> Well...   The limiting factor is "ATA command slots per SATA phy", which
>> defines $N active ATA commands per port.  If a SATA NCQ device is attached,
>> you may have $N active ATA commands queued.  If two SATA NCQ devices are
>> attached to a PMP, which is attached to the SATA controller, the two devices
>> share the limit of $N active ATA commands queued.  If 32 devices are
>> attached to a PMP, all 32 devices share the $N command queue limit.
>>
>> But additionally, as Tejun demonstrated, you might have a mix of NCQ and
>> non-NCQ devices attached to the PMP.
>>
>> Thus, it is a case of nested limits:
>>
>> - $N maximum active commands per port; typically N==32
>> - $M maximum active commands per device; typically N==1 or N==32
>
>
> Note that this applies /somewhat/ to mvsas as well:  mvsas has up to 16 "ATA
> register sets" that the OS driver may assign to various ATA devices on a
> single phy.
>
> This simulates the "command slot" concept on dedicated SATA+PMP+NCQ hardware
> like AHCI or sata_sil24.
>
> If you attach more than 16 ATA devices to a PMP, which is theoretically
> possible, then mvsas MUST fail discovery, because it cannot address that
> many ATA devices.
>
> But mvsas's different architecture means that it does not have similar
> "command slot" limits.
>
>        Jeff
>
>
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 19:42             ` Gwendal Grignou
@ 2009-05-20 19:47               ` Jeff Garzik
  2009-05-21 13:44               ` Mark Lord
  1 sibling, 0 replies; 23+ messages in thread
From: Jeff Garzik @ 2009-05-20 19:47 UTC (permalink / raw)
  To: Gwendal Grignou
  Cc: James Bottomley, Grant Grundler, Jens Axboe, linux-ide, htejun

Gwendal Grignou wrote:
> We can not have more than 16 devices per ata_port: a PMP can only have
> 15 device ports [Sata 2.6- 16.2], and we can not connect a PMP to
> another one.

Whoops, you're right.  I guess I mixed up SAS expanders and PMP's, when 
coughing up that "32".

	Jeff



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20 19:42             ` Gwendal Grignou
  2009-05-20 19:47               ` Jeff Garzik
@ 2009-05-21 13:44               ` Mark Lord
  2009-05-21 17:27                 ` Jeff Garzik
  1 sibling, 1 reply; 23+ messages in thread
From: Mark Lord @ 2009-05-21 13:44 UTC (permalink / raw)
  To: Gwendal Grignou
  Cc: Jeff Garzik, James Bottomley, Grant Grundler, Jens Axboe,
	linux-ide, htejun

Gwendal Grignou wrote:
> Jeff, you're right, I made some mistakes:
> - Reading SiI3132 doc again, it only supports 32 command per port, so
> the tags must be share among all the drives behind the same port.
> But other chipset does support up to 128 commands per port.
> - Jens' patch is working when all disks support NCQ. Only when we have
> a mix of drives with or without NCQ we have a problem.
..

Along those lines, the newer Marvell chipsets support up to 128 commands
per host port.

I seem to recall that the Pacific Digital Qstor chip
can manage an insane number of commands -- something like
32 per device per PM port [eg. (15 * 32) in total per host port].

Seems to be a widespread kind of thing for non-legacy chipsets.
Someday we really ought to beef up libata to allow host-chipset queuing
of non-NCQ commands, too.

Cheers 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: old-EH and SAS (was Re: [PATCH 2/2] block: add function for waiting for a specific free tag)
  2009-05-20 19:34     ` old-EH and SAS (was Re: [PATCH 2/2] block: add function for waiting for a specific free tag) Jeff Garzik
@ 2009-05-21 16:34       ` Brian King
  0 siblings, 0 replies; 23+ messages in thread
From: Brian King @ 2009-05-21 16:34 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Tejun Heo, Jens Axboe, linux-ide, linux-scsi

Jeff Garzik wrote:
> Brian King did a new-EH conversion for ipr, some time ago.  Maybe that
> work could be picked up, extended to libsas, and permit removal of all
> the old-EH code remaining in libata.

There are a couple different ways to accomplish this, and they relate to
how many scsi hosts we end up using for a single SAS HBA.

Single scsi_host solution
This solution really requires libata to more or less stop using scsi core.
At the very least the concept of it "owning" the scsi host must go away.
Additionally, as far as EH goes, quiescing the entire scsi host each time
we want to do some exception handling for a SATA device kills the performance
of the SAS devices on the HBA, so we would need to have a better layered
EH that only quiesced what needs to be quiesced and then called out to
different pluggable EH handling routines.

Multiple scsi_host solution
This is what my patch to ipr did. It was the path of least resistance at
the time and worked reasonably well for ipr, but may not have been the
best solution for libsas without further enhancements. In this solution,
there is a scsi_host for each ATA port found on the SAS fabric. This
allows most of the existing libata code to simply work with minimal changes.
The biggest issue with this approach is we lose adapter queue depth tracking.
We really would need queue groups or some similar solution in order for this
to work for libsas.

-Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-21 13:44               ` Mark Lord
@ 2009-05-21 17:27                 ` Jeff Garzik
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff Garzik @ 2009-05-21 17:27 UTC (permalink / raw)
  To: Mark Lord
  Cc: Gwendal Grignou, James Bottomley, Grant Grundler, Jens Axboe,
	linux-ide, htejun

Mark Lord wrote:
> Gwendal Grignou wrote:
>> Jeff, you're right, I made some mistakes:
>> - Reading SiI3132 doc again, it only supports 32 command per port, so
>> the tags must be share among all the drives behind the same port.
>> But other chipset does support up to 128 commands per port.
>> - Jens' patch is working when all disks support NCQ. Only when we have
>> a mix of drives with or without NCQ we have a problem.
> ..
> 
> Along those lines, the newer Marvell chipsets support up to 128 commands
> per host port.
> 
> I seem to recall that the Pacific Digital Qstor chip
> can manage an insane number of commands -- something like
> 32 per device per PM port [eg. (15 * 32) in total per host port].
> 
> Seems to be a widespread kind of thing for non-legacy chipsets.
> Someday we really ought to beef up libata to allow host-chipset queuing
> of non-NCQ commands, too.

SAS+SATA chips do not necessarily have per-port limits at all, even.

The limit may instead be a host-wide command queue size limit, rather 
than a per-port limit.

	Jeff





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
  2009-05-20 11:53   ` Tejun Heo
  2009-05-20 17:10   ` Grant Grundler
@ 2009-06-10 15:11   ` Jeff Garzik
  2009-06-11  2:10     ` Tejun Heo
  2 siblings, 1 reply; 23+ messages in thread
From: Jeff Garzik @ 2009-06-10 15:11 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, htejun

Jens Axboe wrote:
> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
> a free tag to use. Instead of fixing that up, convert libata to
> using block layer tagging - gets rid of code in libata, and is also
> much faster.
> 
> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> ---
>  drivers/ata/libata-core.c |   65 ++++----------------------------------------
>  drivers/ata/libata-scsi.c |   23 ++++++++++++++-
>  drivers/ata/libata.h      |   19 ++++++++++++-
>  include/linux/libata.h    |    1 -
>  4 files changed, 44 insertions(+), 64 deletions(-)

Patch looks good to me, with regards (a) simplex, (b) master/slave, (c) 
non-queued and (d) NCQ.  I'm not confident about the PMP case, but if 
Tejun is happy, I guess I can go ahead and queue these...

	Jeff





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/2] libata: switch to using block layer tagging support
  2009-06-10 15:11   ` Jeff Garzik
@ 2009-06-11  2:10     ` Tejun Heo
  0 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2009-06-11  2:10 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Jens Axboe, linux-ide

Jeff Garzik wrote:
> Jens Axboe wrote:
>> libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding
>> a free tag to use. Instead of fixing that up, convert libata to
>> using block layer tagging - gets rid of code in libata, and is also
>> much faster.
>>
>> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
>> ---
>>  drivers/ata/libata-core.c |   65
>> ++++----------------------------------------
>>  drivers/ata/libata-scsi.c |   23 ++++++++++++++-
>>  drivers/ata/libata.h      |   19 ++++++++++++-
>>  include/linux/libata.h    |    1 -
>>  4 files changed, 44 insertions(+), 64 deletions(-)
> 
> Patch looks good to me, with regards (a) simplex, (b) master/slave, (c)
> non-queued and (d) NCQ.  I'm not confident about the PMP case, but if
> Tejun is happy, I guess I can go ahead and queue these...

Unfortunately, I'm not quite happy yet. :-) Jens, any progress on
this?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2009-06-11  2:19 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-20  6:59 [PATCH 0/2] libata: switch to block layer tagging Jens Axboe
2009-05-20  7:00 ` [PATCH 1/2] libata: switch to using block layer tagging support Jens Axboe
2009-05-20 11:53   ` Tejun Heo
2009-05-20 17:10   ` Grant Grundler
2009-05-20 18:08     ` Gwendal Grignou
2009-05-20 18:50       ` James Bottomley
2009-05-20 18:58         ` Jeff Garzik
2009-05-20 19:04           ` Jeff Garzik
2009-05-20 19:42             ` Gwendal Grignou
2009-05-20 19:47               ` Jeff Garzik
2009-05-21 13:44               ` Mark Lord
2009-05-21 17:27                 ` Jeff Garzik
2009-05-20 18:51       ` Jeff Garzik
2009-05-20 19:09     ` Jeff Garzik
2009-06-10 15:11   ` Jeff Garzik
2009-06-11  2:10     ` Tejun Heo
2009-05-20  7:01 ` [PATCH 2/2] block: add function for waiting for a specific free tag Jens Axboe
2009-05-20 11:55   ` Tejun Heo
2009-05-20 19:34     ` old-EH and SAS (was Re: [PATCH 2/2] block: add function for waiting for a specific free tag) Jeff Garzik
2009-05-21 16:34       ` Brian King
2009-05-20 17:28   ` [PATCH 2/2] block: add function for waiting for a specific free tag Grant Grundler
2009-05-20  7:53 ` [PATCH 0/2] libata: switch to block layer tagging Jeff Garzik
2009-05-20  7:57   ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.