All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] block: support different tag allocation policy
@ 2014-12-18 18:46 Shaohua Li
  2014-12-18 18:46 ` [PATCH v2 2/3] blk-mq: add " Shaohua Li
  2014-12-18 18:46 ` [PATCH v2 3/3] libata: use blk taging Shaohua Li
  0 siblings, 2 replies; 23+ messages in thread
From: Shaohua Li @ 2014-12-18 18:46 UTC (permalink / raw)
  To: linux-ide; +Cc: Kernel-team, Jens Axboe, Tejun Heo, Christoph Hellwig

The libata tag allocation is using a round-robin policy. Next patch will
make libata use block generic tag allocation, so let's add a policy to
tag allocation.

Currently two policies: FIFO (default) and round-robin.

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-tag.c          | 33 +++++++++++++++++++++++++--------
 drivers/block/osdblk.c   |  2 +-
 drivers/scsi/scsi_scan.c |  3 ++-
 include/linux/blkdev.h   |  8 ++++++--
 include/scsi/scsi_host.h |  3 +++
 include/scsi/scsi_tcq.h  |  3 ++-
 6 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/block/blk-tag.c b/block/blk-tag.c
index a185b86..f0344e6 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -119,7 +119,7 @@ init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth)
 }
 
 static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
-						   int depth)
+						int depth, int alloc_policy)
 {
 	struct blk_queue_tag *tags;
 
@@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
 		goto fail;
 
 	atomic_set(&tags->refcnt, 1);
+	tags->alloc_policy = alloc_policy;
+	tags->next_tag = 0;
 	return tags;
 fail:
 	kfree(tags);
@@ -140,10 +142,11 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
 /**
  * blk_init_tags - initialize the tag info for an external tag map
  * @depth:	the maximum queue depth supported
+ * @alloc_policy: tag allocation policy
  **/
-struct blk_queue_tag *blk_init_tags(int depth)
+struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
 {
-	return __blk_queue_init_tags(NULL, depth);
+	return __blk_queue_init_tags(NULL, depth, alloc_policy);
 }
 EXPORT_SYMBOL(blk_init_tags);
 
@@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags);
  * @q:  the request queue for the device
  * @depth:  the maximum queue depth supported
  * @tags: the tag to use
+ * @alloc_policy: tag allocation policy
  *
  * Queue lock must be held here if the function is called to resize an
  * existing map.
  **/
 int blk_queue_init_tags(struct request_queue *q, int depth,
-			struct blk_queue_tag *tags)
+			struct blk_queue_tag *tags, int alloc_policy)
 {
 	int rc;
 
 	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
 
 	if (!tags && !q->queue_tags) {
-		tags = __blk_queue_init_tags(q, depth);
+		tags = __blk_queue_init_tags(q, depth, alloc_policy);
 
 		if (!tags)
 			return -ENOMEM;
@@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	}
 
 	do {
-		tag = find_first_zero_bit(bqt->tag_map, max_depth);
-		if (tag >= max_depth)
-			return 1;
+		if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
+			tag = find_first_zero_bit(bqt->tag_map, max_depth);
+			if (tag >= max_depth)
+				return 1;
+		} else {
+			int start = bqt->next_tag;
+			int size = min_t(int, bqt->max_depth, max_depth + start);
+			tag = find_next_zero_bit(bqt->tag_map, size, start);
+			if (tag >= size && start + size > bqt->max_depth) {
+				size = start + size - bqt->max_depth;
+				tag = find_first_zero_bit(bqt->tag_map, size);
+			}
+			if (tag >= size)
+				return 1;
+		}
 
 	} while (test_and_set_bit_lock(tag, bqt->tag_map));
 	/*
@@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	 * See blk_queue_end_tag for details.
 	 */
 
+	bqt->next_tag = (tag + 1) % bqt->max_depth;
 	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 79aa179..e229425 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
 	}
 
 	/* switch queue to TCQ mode; allocate tag map */
-	rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
+	rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
 	if (rc) {
 		blk_cleanup_queue(q);
 		put_disk(disk);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 983aed1..921a8c8 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -290,7 +290,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	if (!shost_use_blk_mq(sdev->host) &&
 	    (shost->bqt || shost->hostt->use_blk_tags)) {
 		blk_queue_init_tags(sdev->request_queue,
-				    sdev->host->cmd_per_lun, shost->bqt);
+				    sdev->host->cmd_per_lun, shost->bqt,
+				    shost->hostt->tag_alloc_policy);
 	}
 	scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 92f4b4b..38b095d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -272,7 +272,11 @@ struct blk_queue_tag {
 	int max_depth;			/* what we will send to device */
 	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
+	int alloc_policy;		/* tag allocation policy */
+	int next_tag;			/* next tag */
 };
+#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
+#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
@@ -1139,11 +1143,11 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
-extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
+extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
 extern void blk_queue_free_tags(struct request_queue *);
 extern int blk_queue_resize_tags(struct request_queue *, int);
 extern void blk_queue_invalidate_tags(struct request_queue *);
-extern struct blk_queue_tag *blk_init_tags(int);
+extern struct blk_queue_tag *blk_init_tags(int, int);
 extern void blk_free_tags(struct blk_queue_tag *);
 
 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index e939d2b..642884e 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -415,6 +415,9 @@ struct scsi_host_template {
 	 */
 	unsigned char present;
 
+	/* If use block layer to manage tags, this is tag allocation policy */
+	int tag_alloc_policy;
+
 	/*
 	 * Let the block layer assigns tags to all commands.
 	 */
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index fe4a702..80ae8f8 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -102,7 +102,8 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
 	 * devices on the shared host (for libata)
 	 */
 	if (!shost->bqt) {
-		shost->bqt = blk_init_tags(depth);
+		shost->bqt = blk_init_tags(depth,
+			shost->hostt->tag_alloc_policy);
 		if (!shost->bqt)
 			return -ENOMEM;
 	}
-- 
1.8.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 2/3] blk-mq: add tag allocation policy
  2014-12-18 18:46 [PATCH v2 1/3] block: support different tag allocation policy Shaohua Li
@ 2014-12-18 18:46 ` Shaohua Li
  2014-12-22 23:25   ` Jens Axboe
  2014-12-18 18:46 ` [PATCH v2 3/3] libata: use blk taging Shaohua Li
  1 sibling, 1 reply; 23+ messages in thread
From: Shaohua Li @ 2014-12-18 18:46 UTC (permalink / raw)
  To: linux-ide; +Cc: Kernel-team, Jens Axboe, Tejun Heo, Christoph Hellwig

This is the blk-mq part to support tag allocation policy. The default
allocation policy isn't changed (though it's not a strict FIFO). The new
policy is round-robin for libata. But it's a try-best implementation. If
multiple tasks are competing, the tags returned will be mixed (which is
unavoidable even with !mq, as requests from different tasks can be
mixed in queue)

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-mq-tag.c      | 35 +++++++++++++++++++++++++----------
 block/blk-mq-tag.h      |  4 +++-
 block/blk-mq.c          |  3 ++-
 drivers/scsi/scsi_lib.c |  2 ++
 include/linux/blk-mq.h  |  8 ++++++++
 5 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 32e8dbb..d4d7fa1 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,10 +134,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 	return atomic_read(&hctx->nr_active) < depth;
 }
 
-static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
+#define BT_ALLOC_RR(bt) (atomic_read(&bt->rr_next) != -1)
+
+static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
+			 bool nowrap)
 {
 	int tag, org_last_tag, end;
-	bool wrap = last_tag != 0;
+	bool wrap = last_tag != 0 && !nowrap;
 
 	org_last_tag = last_tag;
 	end = bm->depth;
@@ -183,11 +186,15 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
 	if (!hctx_may_queue(hctx, bt))
 		return -1;
 
-	last_tag = org_last_tag = *tag_cache;
+	if (unlikely(BT_ALLOC_RR(bt)))
+		last_tag = org_last_tag = atomic_read(&bt->rr_next);
+	else
+		last_tag = org_last_tag = *tag_cache;
 	index = TAG_TO_INDEX(bt, last_tag);
 
 	for (i = 0; i < bt->map_nr; i++) {
-		tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag));
+		tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
+				    BT_ALLOC_RR(bt));
 		if (tag != -1) {
 			tag += (index << bt->bits_per_word);
 			goto done;
@@ -206,6 +213,8 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
 	 * up using the specific cached tag.
 	 */
 done:
+	if (unlikely(BT_ALLOC_RR(bt)))
+		atomic_set(&bt->rr_next, (tag + 1) % bt->depth);
 	if (tag == org_last_tag) {
 		last_tag = tag + 1;
 		if (last_tag >= bt->depth - 1)
@@ -463,7 +472,7 @@ static void bt_update_count(struct blk_mq_bitmap_tags *bt,
 }
 
 static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
-			int node, bool reserved)
+			int node, bool reserved, int alloc_policy)
 {
 	int i;
 
@@ -513,6 +522,10 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
 		atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt);
 	}
 
+	if (alloc_policy == BLK_TAG_ALLOC_RR)
+		atomic_set(&bt->rr_next, 0);
+	else
+		atomic_set(&bt->rr_next, -1);
 	return 0;
 }
 
@@ -523,13 +536,14 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
 }
 
 static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
-						   int node)
+						   int node, int alloc_policy)
 {
 	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
 
-	if (bt_alloc(&tags->bitmap_tags, depth, node, false))
+	if (bt_alloc(&tags->bitmap_tags, depth, node, false, alloc_policy))
 		goto enomem;
-	if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
+	if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node,
+		     true, alloc_policy))
 		goto enomem;
 
 	return tags;
@@ -540,7 +554,8 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
 }
 
 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
-				     unsigned int reserved_tags, int node)
+				     unsigned int reserved_tags,
+				     int node, int alloc_policy)
 {
 	struct blk_mq_tags *tags;
 
@@ -556,7 +571,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 	tags->nr_tags = total_tags;
 	tags->nr_reserved_tags = reserved_tags;
 
-	return blk_mq_init_bitmap_tags(tags, node);
+	return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
 }
 
 void blk_mq_free_tags(struct blk_mq_tags *tags)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 6206ed1..3ffa336 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -26,6 +26,8 @@ struct blk_mq_bitmap_tags {
 
 	atomic_t wake_index;
 	struct bt_wait_state *bs;
+
+	atomic_t rr_next;
 };
 
 /*
@@ -45,7 +47,7 @@ struct blk_mq_tags {
 };
 
 
-extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
+extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
 extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 
 extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da1ab56..7bf3656 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1359,7 +1359,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 	size_t rq_size, left;
 
 	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
-				set->numa_node);
+				set->numa_node,
+				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
 	if (!tags)
 		return NULL;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 43318d5..7fd8d17 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2186,6 +2186,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 	shost->tag_set.cmd_size = cmd_size;
 	shost->tag_set.numa_node = NUMA_NO_NODE;
 	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	shost->tag_set.flags |=
+		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
 	shost->tag_set.driver_data = shost;
 
 	return blk_mq_alloc_tag_set(&shost->tag_set);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8aded9a..b4bf93a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -147,6 +147,8 @@ enum {
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_SYSFS_UP	= 1 << 3,
 	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
+	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
+	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
@@ -155,6 +157,12 @@ enum {
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
+#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
+	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
+		((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
+#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
+	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
+		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 void blk_mq_finish_init(struct request_queue *q);
-- 
1.8.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] libata: use blk taging
  2014-12-18 18:46 [PATCH v2 1/3] block: support different tag allocation policy Shaohua Li
  2014-12-18 18:46 ` [PATCH v2 2/3] blk-mq: add " Shaohua Li
@ 2014-12-18 18:46 ` Shaohua Li
  2015-01-09 18:15   ` Shaohua Li
  1 sibling, 1 reply; 23+ messages in thread
From: Shaohua Li @ 2014-12-18 18:46 UTC (permalink / raw)
  To: linux-ide; +Cc: Kernel-team, Jens Axboe, Tejun Heo, Christoph Hellwig

libata uses its own tag management which is duplication and the
implementation is poor. And if we switch to blk-mq, tag is build-in.
It's time to switch to generic taging.

The SAS driver has its own tag management, and looks we can't directly
map the host controler tag to SATA tag. So I just bypassed the SAS case.

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/ata/libata-core.c | 20 ++++++++++++++------
 drivers/ata/libata-scsi.c |  4 +++-
 drivers/ata/libata.h      |  2 +-
 include/linux/libata.h    |  1 +
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 5c84fb5..a27d00f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1585,8 +1585,9 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	else
 		tag = 0;
 
-	if (test_and_set_bit(tag, &ap->qc_allocated))
-		BUG();
+	BUG_ON((!ap->scsi_host && test_and_set_bit(tag, &ap->qc_allocated)) ||
+		(ap->scsi_host && dev->sdev &&
+			blk_queue_find_tag(dev->sdev->request_queue, tag)));
 	qc = __ata_qc_from_tag(ap, tag);
 
 	qc->tag = tag;
@@ -4737,7 +4738,7 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
  *	None.
  */
 
-static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap, int blktag)
 {
 	struct ata_queued_cmd *qc = NULL;
 	unsigned int max_queue = ap->host->n_tags;
@@ -4747,6 +4748,12 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
 		return NULL;
 
+	if (ap->scsi_host) {
+		qc = __ata_qc_from_tag(ap, blktag);
+		qc->tag = blktag;
+		return qc;
+	}
+
 	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
 		tag = tag < max_queue ? tag : 0;
 
@@ -4773,12 +4780,12 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
  *	None.
  */
 
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
+struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int blktag)
 {
 	struct ata_port *ap = dev->link->ap;
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new(ap);
+	qc = ata_qc_new(ap, blktag);
 	if (qc) {
 		qc->scsicmd = NULL;
 		qc->ap = ap;
@@ -4812,7 +4819,8 @@ void ata_qc_free(struct ata_queued_cmd *qc)
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		clear_bit(tag, &ap->qc_allocated);
+		if (!ap->scsi_host)
+			clear_bit(tag, &ap->qc_allocated);
 	}
 }
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e364e86..94339c2 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -756,7 +756,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
 {
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new_init(dev);
+	qc = ata_qc_new_init(dev, cmd->request->tag);
 	if (qc) {
 		qc->scsicmd = cmd;
 		qc->scsidone = cmd->scsi_done;
@@ -3666,6 +3666,8 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
 		 */
 		shost->max_host_blocked = 1;
 
+		scsi_init_shared_tag_map(shost, host->n_tags);
+
 		rc = scsi_add_host_with_dma(ap->scsi_host,
 						&ap->tdev, ap->host->dev);
 		if (rc)
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 5f4e0cc..4040513 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -63,7 +63,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
+extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2d18241..5f1c5606 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1344,6 +1344,7 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 	.ioctl			= ata_scsi_ioctl,		\
 	.queuecommand		= ata_scsi_queuecmd,		\
 	.can_queue		= ATA_DEF_QUEUE,		\
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.this_id		= ATA_SHT_THIS_ID,		\
 	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,		\
 	.emulated		= ATA_SHT_EMULATED,		\
-- 
1.8.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 2/3] blk-mq: add tag allocation policy
  2014-12-18 18:46 ` [PATCH v2 2/3] blk-mq: add " Shaohua Li
@ 2014-12-22 23:25   ` Jens Axboe
  2015-01-02 19:43     ` Shaohua Li
  0 siblings, 1 reply; 23+ messages in thread
From: Jens Axboe @ 2014-12-22 23:25 UTC (permalink / raw)
  To: Shaohua Li, linux-ide; +Cc: Kernel-team, Tejun Heo, Christoph Hellwig

On 12/18/2014 11:46 AM, Shaohua Li wrote:
> This is the blk-mq part to support tag allocation policy. The default
> allocation policy isn't changed (though it's not a strict FIFO). The new
> policy is round-robin for libata. But it's a try-best implementation. If
> multiple tasks are competing, the tags returned will be mixed (which is
> unavoidable even with !mq, as requests from different tasks can be
> mixed in queue)

Can we get rid of the atomic_read() on non-rr? It's never set outside of 
initialization there.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 2/3] blk-mq: add tag allocation policy
  2014-12-22 23:25   ` Jens Axboe
@ 2015-01-02 19:43     ` Shaohua Li
  0 siblings, 0 replies; 23+ messages in thread
From: Shaohua Li @ 2015-01-02 19:43 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-ide, Kernel-team, Tejun Heo, Christoph Hellwig

On Mon, Dec 22, 2014 at 04:25:19PM -0700, Jens Axboe wrote:
> On 12/18/2014 11:46 AM, Shaohua Li wrote:
> >This is the blk-mq part to support tag allocation policy. The default
> >allocation policy isn't changed (though it's not a strict FIFO). The new
> >policy is round-robin for libata. But it's a try-best implementation. If
> >multiple tasks are competing, the tags returned will be mixed (which is
> >unavoidable even with !mq, as requests from different tasks can be
> >mixed in queue)
> 
> Can we get rid of the atomic_read() on non-rr? It's never set
> outside of initialization there.

How about this updated one? I'm not sure if a function pointer call is
faster than the condition statement I use in the patch (for different
policy), but if you prefer a function pointer, I can change it.


>From 3f722ee2cea2f8694157b92b6d1f068c15c36961 Mon Sep 17 00:00:00 2001
Message-Id: <3f722ee2cea2f8694157b92b6d1f068c15c36961.1420227455.git.shli@fb.com>
In-Reply-To: <01887310712e5ceb9ce7392f240ed311021fb779.1420227455.git.shli@fb.com>
References: <01887310712e5ceb9ce7392f240ed311021fb779.1420227455.git.shli@fb.com>
From: Shaohua Li <shli@fb.com>
Date: Tue, 16 Dec 2014 13:02:56 -0800
Subject: [PATCH 2/3] blk-mq: add tag allocation policy

This is the blk-mq part to support tag allocation policy. The default
allocation policy isn't changed (though it's not a strict FIFO). The new
policy is round-robin for libata. But it's a try-best implementation. If
multiple tasks are competing, the tags returned will be mixed (which is
unavoidable even with !mq, as requests from different tasks can be
mixed in queue)

Cc: Jens Axboe <axboe@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-mq-tag.c      | 39 ++++++++++++++++++++++++---------------
 block/blk-mq-tag.h      |  4 +++-
 block/blk-mq.c          |  3 ++-
 drivers/scsi/scsi_lib.c |  2 ++
 include/linux/blk-mq.h  |  8 ++++++++
 5 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 32e8dbb..f9e751b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,10 +134,11 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 	return atomic_read(&hctx->nr_active) < depth;
 }
 
-static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
+static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
+			 bool nowrap)
 {
 	int tag, org_last_tag, end;
-	bool wrap = last_tag != 0;
+	bool wrap = last_tag != 0 && !nowrap;
 
 	org_last_tag = last_tag;
 	end = bm->depth;
@@ -163,6 +164,8 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
 	return tag;
 }
 
+#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
+
 /*
  * Straight forward bitmap tag implementation, where each bit is a tag
  * (cleared == free, and set == busy). The small twist is using per-cpu
@@ -175,7 +178,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
  * until the map is exhausted.
  */
 static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
-		    unsigned int *tag_cache)
+		    unsigned int *tag_cache, struct blk_mq_tags *tags)
 {
 	unsigned int last_tag, org_last_tag;
 	int index, i, tag;
@@ -187,7 +190,8 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
 	index = TAG_TO_INDEX(bt, last_tag);
 
 	for (i = 0; i < bt->map_nr; i++) {
-		tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag));
+		tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
+				    BT_ALLOC_RR(tags));
 		if (tag != -1) {
 			tag += (index << bt->bits_per_word);
 			goto done;
@@ -206,7 +210,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
 	 * up using the specific cached tag.
 	 */
 done:
-	if (tag == org_last_tag) {
+	if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
 		last_tag = tag + 1;
 		if (last_tag >= bt->depth - 1)
 			last_tag = 0;
@@ -235,13 +239,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
 static int bt_get(struct blk_mq_alloc_data *data,
 		struct blk_mq_bitmap_tags *bt,
 		struct blk_mq_hw_ctx *hctx,
-		unsigned int *last_tag)
+		unsigned int *last_tag, struct blk_mq_tags *tags)
 {
 	struct bt_wait_state *bs;
 	DEFINE_WAIT(wait);
 	int tag;
 
-	tag = __bt_get(hctx, bt, last_tag);
+	tag = __bt_get(hctx, bt, last_tag, tags);
 	if (tag != -1)
 		return tag;
 
@@ -252,7 +256,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
 	do {
 		prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
 
-		tag = __bt_get(hctx, bt, last_tag);
+		tag = __bt_get(hctx, bt, last_tag, tags);
 		if (tag != -1)
 			break;
 
@@ -267,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
 		 * Retry tag allocation after running the hardware queue,
 		 * as running the queue may also have found completions.
 		 */
-		tag = __bt_get(hctx, bt, last_tag);
+		tag = __bt_get(hctx, bt, last_tag, tags);
 		if (tag != -1)
 			break;
 
@@ -298,7 +302,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	int tag;
 
 	tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
-			&data->ctx->last_tag);
+			&data->ctx->last_tag, data->hctx->tags);
 	if (tag >= 0)
 		return tag + data->hctx->tags->nr_reserved_tags;
 
@@ -314,7 +318,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
 		return BLK_MQ_TAG_FAIL;
 	}
 
-	tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero);
+	tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
+		data->hctx->tags);
 	if (tag < 0)
 		return BLK_MQ_TAG_FAIL;
 
@@ -386,7 +391,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
 
 		BUG_ON(real_tag >= tags->nr_tags);
 		bt_clear_tag(&tags->bitmap_tags, real_tag);
-		*last_tag = real_tag;
+		if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
+			*last_tag = real_tag;
 	} else {
 		BUG_ON(tag >= tags->nr_reserved_tags);
 		bt_clear_tag(&tags->breserved_tags, tag);
@@ -523,10 +529,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
 }
 
 static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
-						   int node)
+						   int node, int alloc_policy)
 {
 	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
 
+	tags->alloc_policy = alloc_policy;
+
 	if (bt_alloc(&tags->bitmap_tags, depth, node, false))
 		goto enomem;
 	if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
@@ -540,7 +548,8 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
 }
 
 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
-				     unsigned int reserved_tags, int node)
+				     unsigned int reserved_tags,
+				     int node, int alloc_policy)
 {
 	struct blk_mq_tags *tags;
 
@@ -556,7 +565,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 	tags->nr_tags = total_tags;
 	tags->nr_reserved_tags = reserved_tags;
 
-	return blk_mq_init_bitmap_tags(tags, node);
+	return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
 }
 
 void blk_mq_free_tags(struct blk_mq_tags *tags)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 6206ed1..235e4c3 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -42,10 +42,12 @@ struct blk_mq_tags {
 
 	struct request **rqs;
 	struct list_head page_list;
+
+	int alloc_policy;
 };
 
 
-extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
+extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
 extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 
 extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da1ab56..7bf3656 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1359,7 +1359,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 	size_t rq_size, left;
 
 	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
-				set->numa_node);
+				set->numa_node,
+				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
 	if (!tags)
 		return NULL;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9ea95dd..49ab115 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 	shost->tag_set.cmd_size = cmd_size;
 	shost->tag_set.numa_node = NUMA_NO_NODE;
 	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	shost->tag_set.flags |=
+		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
 	shost->tag_set.driver_data = shost;
 
 	return blk_mq_alloc_tag_set(&shost->tag_set);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8aded9a..b4bf93a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -147,6 +147,8 @@ enum {
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_SYSFS_UP	= 1 << 3,
 	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
+	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
+	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
@@ -155,6 +157,12 @@ enum {
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
+#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
+	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
+		((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
+#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
+	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
+		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 void blk_mq_finish_init(struct request_queue *q);
-- 
1.8.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2014-12-18 18:46 ` [PATCH v2 3/3] libata: use blk taging Shaohua Li
@ 2015-01-09 18:15   ` Shaohua Li
  2015-01-09 21:43     ` Tejun Heo
  0 siblings, 1 reply; 23+ messages in thread
From: Shaohua Li @ 2015-01-09 18:15 UTC (permalink / raw)
  To: linux-ide; +Cc: Kernel-team, Jens Axboe, Tejun Heo, Christoph Hellwig

Ping!

On Thu, Dec 18, 2014 at 10:46:22AM -0800, Shaohua Li wrote:
> libata uses its own tag management which is duplication and the
> implementation is poor. And if we switch to blk-mq, tag is build-in.
> It's time to switch to generic taging.
> 
> The SAS driver has its own tag management, and looks we can't directly
> map the host controler tag to SATA tag. So I just bypassed the SAS case.
> 
> Cc: Jens Axboe <axboe@fb.com>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Christoph Hellwig <hch@infradead.org>
> Signed-off-by: Shaohua Li <shli@fb.com>
> ---
>  drivers/ata/libata-core.c | 20 ++++++++++++++------
>  drivers/ata/libata-scsi.c |  4 +++-
>  drivers/ata/libata.h      |  2 +-
>  include/linux/libata.h    |  1 +
>  4 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 5c84fb5..a27d00f 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -1585,8 +1585,9 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
>  	else
>  		tag = 0;
>  
> -	if (test_and_set_bit(tag, &ap->qc_allocated))
> -		BUG();
> +	BUG_ON((!ap->scsi_host && test_and_set_bit(tag, &ap->qc_allocated)) ||
> +		(ap->scsi_host && dev->sdev &&
> +			blk_queue_find_tag(dev->sdev->request_queue, tag)));
>  	qc = __ata_qc_from_tag(ap, tag);
>  
>  	qc->tag = tag;
> @@ -4737,7 +4738,7 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
>   *	None.
>   */
>  
> -static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
> +static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap, int blktag)
>  {
>  	struct ata_queued_cmd *qc = NULL;
>  	unsigned int max_queue = ap->host->n_tags;
> @@ -4747,6 +4748,12 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
>  	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
>  		return NULL;
>  
> +	if (ap->scsi_host) {
> +		qc = __ata_qc_from_tag(ap, blktag);
> +		qc->tag = blktag;
> +		return qc;
> +	}
> +
>  	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
>  		tag = tag < max_queue ? tag : 0;
>  
> @@ -4773,12 +4780,12 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
>   *	None.
>   */
>  
> -struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
> +struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int blktag)
>  {
>  	struct ata_port *ap = dev->link->ap;
>  	struct ata_queued_cmd *qc;
>  
> -	qc = ata_qc_new(ap);
> +	qc = ata_qc_new(ap, blktag);
>  	if (qc) {
>  		qc->scsicmd = NULL;
>  		qc->ap = ap;
> @@ -4812,7 +4819,8 @@ void ata_qc_free(struct ata_queued_cmd *qc)
>  	tag = qc->tag;
>  	if (likely(ata_tag_valid(tag))) {
>  		qc->tag = ATA_TAG_POISON;
> -		clear_bit(tag, &ap->qc_allocated);
> +		if (!ap->scsi_host)
> +			clear_bit(tag, &ap->qc_allocated);
>  	}
>  }
>  
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index e364e86..94339c2 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -756,7 +756,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
>  {
>  	struct ata_queued_cmd *qc;
>  
> -	qc = ata_qc_new_init(dev);
> +	qc = ata_qc_new_init(dev, cmd->request->tag);
>  	if (qc) {
>  		qc->scsicmd = cmd;
>  		qc->scsidone = cmd->scsi_done;
> @@ -3666,6 +3666,8 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
>  		 */
>  		shost->max_host_blocked = 1;
>  
> +		scsi_init_shared_tag_map(shost, host->n_tags);
> +
>  		rc = scsi_add_host_with_dma(ap->scsi_host,
>  						&ap->tdev, ap->host->dev);
>  		if (rc)
> diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
> index 5f4e0cc..4040513 100644
> --- a/drivers/ata/libata.h
> +++ b/drivers/ata/libata.h
> @@ -63,7 +63,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
>  extern void ata_force_cbl(struct ata_port *ap);
>  extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
>  extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
> -extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
> +extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
>  extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
>  			   u64 block, u32 n_block, unsigned int tf_flags,
>  			   unsigned int tag);
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index 2d18241..5f1c5606 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -1344,6 +1344,7 @@ extern struct device_attribute *ata_common_sdev_attrs[];
>  	.ioctl			= ata_scsi_ioctl,		\
>  	.queuecommand		= ata_scsi_queuecmd,		\
>  	.can_queue		= ATA_DEF_QUEUE,		\
> +	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
>  	.this_id		= ATA_SHT_THIS_ID,		\
>  	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,		\
>  	.emulated		= ATA_SHT_EMULATED,		\
> -- 
> 1.8.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-09 18:15   ` Shaohua Li
@ 2015-01-09 21:43     ` Tejun Heo
  2015-01-09 21:59       ` Shaohua Li
  0 siblings, 1 reply; 23+ messages in thread
From: Tejun Heo @ 2015-01-09 21:43 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-ide, Kernel-team, Jens Axboe, Christoph Hellwig

On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
> Ping!

I like the idea but it bothers me that we end up with two separate
ways of allocating command tags.  Any chance the old one can be
removed?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-09 21:43     ` Tejun Heo
@ 2015-01-09 21:59       ` Shaohua Li
  2015-01-09 22:12         ` Shaohua Li
  0 siblings, 1 reply; 23+ messages in thread
From: Shaohua Li @ 2015-01-09 21:59 UTC (permalink / raw)
  To: Tejun Heo, dan.j.williams
  Cc: linux-ide, Kernel-team, Jens Axboe, Christoph Hellwig

On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
> On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
> > Ping!
> 
> I like the idea but it bothers me that we end up with two separate
> ways of allocating command tags.  Any chance the old one can be
> removed?
> 
> Thanks.
> 
> -- 
> tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-09 21:59       ` Shaohua Li
@ 2015-01-09 22:12         ` Shaohua Li
  2015-01-14  8:08           ` Dan Williams
  0 siblings, 1 reply; 23+ messages in thread
From: Shaohua Li @ 2015-01-09 22:12 UTC (permalink / raw)
  To: Tejun Heo, dan.j.williams
  Cc: linux-ide, Kernel-team, Jens Axboe, Christoph Hellwig

On Fri, Jan 09, 2015 at 01:59:19PM -0800, Shaohua Li wrote:
> On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
> > On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
> > > Ping!
> > 
> > I like the idea but it bothers me that we end up with two separate
> > ways of allocating command tags.  Any chance the old one can be
> > removed?

Oh, sorry, my reply truncated.

So I checked with IPR driver guys, the ipr doesn't use ncq, so we can
always use sata tag 0, but not sure for libsas.

Maybe Dan can answer if there is a way we can map SCSI tag to SATA tag.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-09 22:12         ` Shaohua Li
@ 2015-01-14  8:08           ` Dan Williams
  2015-01-14 16:30             ` Shaohua Li
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Williams @ 2015-01-14  8:08 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Tejun Heo, IDE/ATA development list, Kernel-team, Jens Axboe,
	Christoph Hellwig

On Fri, Jan 9, 2015 at 2:12 PM, Shaohua Li <shli@fb.com> wrote:
> On Fri, Jan 09, 2015 at 01:59:19PM -0800, Shaohua Li wrote:
>> On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
>> > On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
>> > > Ping!
>> >
>> > I like the idea but it bothers me that we end up with two separate
>> > ways of allocating command tags.  Any chance the old one can be
>> > removed?
>
> Oh, sorry, my reply truncated.
>
> So I checked with IPR driver guys, the ipr doesn't use ncq, so we can
> always use sata tag 0, but not sure for libsas.
>
> Maybe Dan can answer if there is a way we can map SCSI tag to SATA tag.
>

For libsas or for ata drivers?  For libsas, iirc, the internal libata
tag is ignored and we use the scsi tag for the sas task  For ata some
drivers want round robin, but others appear to care about using the
lowest available: https://bugzilla.kernel.org/show_bug.cgi?id=87101.

Is ata using it's own legacy tag ordering scheme getting in the way of
other improvements?  I'd just as soon recommend letting legacy dogs
lie.  In other words what do we gain by switching?

I need to follow up on bz87101, seems I never reworked the patch as
Tejun asked.  However, I'm glad a fix like the one proposed in that
report can be self-contained to libata and need not worry about
supporting ata specific quirks in the block layer tag ordering scheme.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-14  8:08           ` Dan Williams
@ 2015-01-14 16:30             ` Shaohua Li
  2015-01-14 17:09               ` Dan Williams
       [not found]               ` <CAPcyv4hMwpGzeaovnXtqyYFM46wodRamZd7CNVoR43JJYh0Tjg@mail.gmail.com>
  0 siblings, 2 replies; 23+ messages in thread
From: Shaohua Li @ 2015-01-14 16:30 UTC (permalink / raw)
  To: Dan Williams
  Cc: Tejun Heo, IDE/ATA development list, Kernel-team, Jens Axboe,
	Christoph Hellwig

On Wed, Jan 14, 2015 at 12:08:26AM -0800, Dan Williams wrote:
> On Fri, Jan 9, 2015 at 2:12 PM, Shaohua Li <shli@fb.com> wrote:
> > On Fri, Jan 09, 2015 at 01:59:19PM -0800, Shaohua Li wrote:
> >> On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
> >> > On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
> >> > > Ping!
> >> >
> >> > I like the idea but it bothers me that we end up with two separate
> >> > ways of allocating command tags.  Any chance the old one can be
> >> > removed?
> >
> > Oh, sorry, my reply truncated.
> >
> > So I checked with IPR driver guys, the ipr doesn't use ncq, so we can
> > always use sata tag 0, but not sure for libsas.
> >
> > Maybe Dan can answer if there is a way we can map SCSI tag to SATA tag.
> >
> 
> For libsas or for ata drivers?  For libsas, iirc, the internal libata
> tag is ignored and we use the scsi tag for the sas task  For ata some
> drivers want round robin, but others appear to care about using the
> lowest available: https://urldefense.proofpoint.com/v1/url?u=https://bugzilla.kernel.org/show_bug.cgi?id%3D87101&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=3RFlL12A7nwmLRXunVJq2g%3D%3D%0A&m=i5Fz0FbsOgTMAtXW2WeXro9juvX%2BRmkKOK%2Fnl5ZZWMw%3D%0A&s=b8747b79ff51a364a841dd8e2ff51814536fcd63eedb7ad8f1de68e6691e5e98.
> 
> Is ata using it's own legacy tag ordering scheme getting in the way of
> other improvements?  I'd just as soon recommend letting legacy dogs
> lie.  In other words what do we gain by switching?
> 
> I need to follow up on bz87101, seems I never reworked the patch as
> Tejun asked.  However, I'm glad a fix like the one proposed in that
> report can be self-contained to libata and need not worry about
> supporting ata specific quirks in the block layer tag ordering scheme.

Basically I'd like using block tag instead of an open coded tag
implementation in libata. The block tag implenmentation is more
sophisticated, and with blk-mq tag is built-in, so it's great to remove
tag implementation in libata. For sata, it's easy to do it. The problem
is sas, which implements its own scsi driver/tag. My question is if we
can map SCSI tag (of libsas) to ATA tag. So for example, the ipr sas
driver doesn't use NCQ, so we can always use ata tag 0 in libata for
ipr. I'm wondering if we can do some mapping in libsas too, so we can
completely delete the tag code in libata.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-14 16:30             ` Shaohua Li
@ 2015-01-14 17:09               ` Dan Williams
       [not found]               ` <CAPcyv4hMwpGzeaovnXtqyYFM46wodRamZd7CNVoR43JJYh0Tjg@mail.gmail.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Dan Williams @ 2015-01-14 17:09 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Tejun Heo, IDE/ATA development list, Kernel-team, Jens Axboe,
	Christoph Hellwig

[ excuse the re-send ]

On Wed, Jan 14, 2015 at 8:30 AM, Shaohua Li <shli@fb.com> wrote:
> On Wed, Jan 14, 2015 at 12:08:26AM -0800, Dan Williams wrote:
>> On Fri, Jan 9, 2015 at 2:12 PM, Shaohua Li <shli@fb.com> wrote:
>> > On Fri, Jan 09, 2015 at 01:59:19PM -0800, Shaohua Li wrote:
>> >> On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
>> >> > On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
>> >> > > Ping!
>> >> >
>> >> > I like the idea but it bothers me that we end up with two separate
>> >> > ways of allocating command tags.  Any chance the old one can be
>> >> > removed?
>> >
>> > Oh, sorry, my reply truncated.
>> >
>> > So I checked with IPR driver guys, the ipr doesn't use ncq, so we can
>> > always use sata tag 0, but not sure for libsas.
>> >
>> > Maybe Dan can answer if there is a way we can map SCSI tag to SATA tag.
>> >
>>
>> For libsas or for ata drivers?  For libsas, iirc, the internal libata
>> tag is ignored and we use the scsi tag for the sas task  For ata some
>> drivers want round robin, but others appear to care about using the
>> lowest available: https://urldefense.proofpoint.com/v1/url?u=https://bugzilla.kernel.org/show_bug.cgi?id%3D87101&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=3RFlL12A7nwmLRXunVJq2g%3D%3D%0A&m=i5Fz0FbsOgTMAtXW2WeXro9juvX%2BRmkKOK%2Fnl5ZZWMw%3D%0A&s=b8747b79ff51a364a841dd8e2ff51814536fcd63eedb7ad8f1de68e6691e5e98.
>>
>> Is ata using it's own legacy tag ordering scheme getting in the way of
>> other improvements?  I'd just as soon recommend letting legacy dogs
>> lie.  In other words what do we gain by switching?
>>
>> I need to follow up on bz87101, seems I never reworked the patch as
>> Tejun asked.  However, I'm glad a fix like the one proposed in that
>> report can be self-contained to libata and need not worry about
>> supporting ata specific quirks in the block layer tag ordering scheme.
>
> Basically I'd like using block tag instead of an open coded tag
> implementation in libata. The block tag implenmentation is more
> sophisticated, and with blk-mq tag is built-in, so it's great to remove
> tag implementation in libata.

Is it great to remove?  It's not clear what the cost of maintaining it
is.  How would you handle the case where some ata controllers want the
lowest available tag vs others that want round robin.

> For sata, it's easy to do it. The problem
> is sas, which implements its own scsi driver/tag. My question is if we
> can map SCSI tag (of libsas) to ATA tag. So for example, the ipr sas
> driver doesn't use NCQ, so we can always use ata tag 0 in libata for
> ipr. I'm wondering if we can do some mapping in libsas too, so we can
> completely delete the tag code in libata.

The problem with using the scsi tag for libsas is that the scsi_host
may support 256 commands, but only 32 per ata device.   Seems like a
problem blk-mq has already solved (controller + per-endpoint tag
queues), but I have not looked...

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
       [not found]               ` <CAPcyv4hMwpGzeaovnXtqyYFM46wodRamZd7CNVoR43JJYh0Tjg@mail.gmail.com>
@ 2015-01-14 17:13                 ` Shaohua Li
  2015-01-14 17:37                   ` Dan Williams
  0 siblings, 1 reply; 23+ messages in thread
From: Shaohua Li @ 2015-01-14 17:13 UTC (permalink / raw)
  To: Dan Williams
  Cc: Tejun Heo, IDE/ATA development list, Kernel-team, Jens Axboe,
	Christoph Hellwig

On Wed, Jan 14, 2015 at 09:04:24AM -0800, Dan Williams wrote:
> On Wed, Jan 14, 2015 at 8:30 AM, Shaohua Li <shli@fb.com> wrote:
> 
> > On Wed, Jan 14, 2015 at 12:08:26AM -0800, Dan Williams wrote:
> > > On Fri, Jan 9, 2015 at 2:12 PM, Shaohua Li <shli@fb.com> wrote:
> > > > On Fri, Jan 09, 2015 at 01:59:19PM -0800, Shaohua Li wrote:
> > > >> On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
> > > >> > On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
> > > >> > > Ping!
> > > >> >
> > > >> > I like the idea but it bothers me that we end up with two separate
> > > >> > ways of allocating command tags.  Any chance the old one can be
> > > >> > removed?
> > > >
> > > > Oh, sorry, my reply truncated.
> > > >
> > > > So I checked with IPR driver guys, the ipr doesn't use ncq, so we can
> > > > always use sata tag 0, but not sure for libsas.
> > > >
> > > > Maybe Dan can answer if there is a way we can map SCSI tag to SATA tag.
> > > >
> > >
> > > For libsas or for ata drivers?  For libsas, iirc, the internal libata
> > > tag is ignored and we use the scsi tag for the sas task  For ata some
> > > drivers want round robin, but others appear to care about using the
> > > lowest available:
> > https://urldefense.proofpoint.com/v1/url?u=https://bugzilla.kernel.org/show_bug.cgi?id%3D87101&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=3RFlL12A7nwmLRXunVJq2g%3D%3D%0A&m=i5Fz0FbsOgTMAtXW2WeXro9juvX%2BRmkKOK%2Fnl5ZZWMw%3D%0A&s=b8747b79ff51a364a841dd8e2ff51814536fcd63eedb7ad8f1de68e6691e5e98
> > .
> > >
> > > Is ata using it's own legacy tag ordering scheme getting in the way of
> > > other improvements?  I'd just as soon recommend letting legacy dogs
> > > lie.  In other words what do we gain by switching?
> > >
> > > I need to follow up on bz87101, seems I never reworked the patch as
> > > Tejun asked.  However, I'm glad a fix like the one proposed in that
> > > report can be self-contained to libata and need not worry about
> > > supporting ata specific quirks in the block layer tag ordering scheme.
> >
> > Basically I'd like using block tag instead of an open coded tag
> > implementation in libata. The block tag implenmentation is more
> > sophisticated, and with blk-mq tag is built-in, so it's great to remove
> > tag implementation in libata.
> 
> 
> Is it great to remove?  It's not clear what the cost of maintaining it is.
> How would you handle the case where some ata controllers want the lowest
> available tag vs others that want round robin.

I added a flag to blk-tag/blk-mq tag to implement different tag
allocation strategy, for example, round robin and fifo.

> > For sata, it's easy to do it. The problem
> > is sas, which implements its own scsi driver/tag. My question is if we
> > can map SCSI tag (of libsas) to ATA tag. So for example, the ipr sas
> > driver doesn't use NCQ, so we can always use ata tag 0 in libata for
> > ipr. I'm wondering if we can do some mapping in libsas too, so we can
> > completely delete the tag code in libata.
> >
> 
> The problem with using the scsi tag for libsas is that the scsi_host may
> support 256 commands, but only 32 per ata device.   Seems like a problem
> blk-mq has already solved (controller + per-endpoint tag queues), but I
> have not looked...

This is exactly the issue I'm talking about. Is there a way to map the
256 (scsi tag) to ata tag for libsas? Like I said, for ipr sas, I'll do
scsi tag -> ata tag 0, since it doesn't use NCQ for SATA. Is there
something similar or whatever for libsas? From search, ipr and libsas
are the only scsi drivers using libata.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-14 17:13                 ` Shaohua Li
@ 2015-01-14 17:37                   ` Dan Williams
  2015-01-15  9:28                     ` Christoph Hellwig
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Williams @ 2015-01-14 17:37 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Tejun Heo, IDE/ATA development list, Kernel-team, Jens Axboe,
	Christoph Hellwig

On Wed, Jan 14, 2015 at 9:13 AM, Shaohua Li <shli@fb.com> wrote:
> On Wed, Jan 14, 2015 at 09:04:24AM -0800, Dan Williams wrote:
>> On Wed, Jan 14, 2015 at 8:30 AM, Shaohua Li <shli@fb.com> wrote:
>>
>> > On Wed, Jan 14, 2015 at 12:08:26AM -0800, Dan Williams wrote:
>> > > On Fri, Jan 9, 2015 at 2:12 PM, Shaohua Li <shli@fb.com> wrote:
>> > > > On Fri, Jan 09, 2015 at 01:59:19PM -0800, Shaohua Li wrote:
>> > > >> On Fri, Jan 09, 2015 at 04:43:07PM -0500, Tejun Heo wrote:
>> > > >> > On Fri, Jan 09, 2015 at 10:15:29AM -0800, Shaohua Li wrote:
>> > > >> > > Ping!
>> > > >> >
>> > > >> > I like the idea but it bothers me that we end up with two separate
>> > > >> > ways of allocating command tags.  Any chance the old one can be
>> > > >> > removed?
>> > > >
>> > > > Oh, sorry, my reply truncated.
>> > > >
>> > > > So I checked with IPR driver guys, the ipr doesn't use ncq, so we can
>> > > > always use sata tag 0, but not sure for libsas.
>> > > >
>> > > > Maybe Dan can answer if there is a way we can map SCSI tag to SATA tag.
>> > > >
>> > >
>> > > For libsas or for ata drivers?  For libsas, iirc, the internal libata
>> > > tag is ignored and we use the scsi tag for the sas task  For ata some
>> > > drivers want round robin, but others appear to care about using the
>> > > lowest available:
>> > https://urldefense.proofpoint.com/v1/url?u=https://bugzilla.kernel.org/show_bug.cgi?id%3D87101&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=3RFlL12A7nwmLRXunVJq2g%3D%3D%0A&m=i5Fz0FbsOgTMAtXW2WeXro9juvX%2BRmkKOK%2Fnl5ZZWMw%3D%0A&s=b8747b79ff51a364a841dd8e2ff51814536fcd63eedb7ad8f1de68e6691e5e98
>> > .
>> > >
>> > > Is ata using it's own legacy tag ordering scheme getting in the way of
>> > > other improvements?  I'd just as soon recommend letting legacy dogs
>> > > lie.  In other words what do we gain by switching?
>> > >
>> > > I need to follow up on bz87101, seems I never reworked the patch as
>> > > Tejun asked.  However, I'm glad a fix like the one proposed in that
>> > > report can be self-contained to libata and need not worry about
>> > > supporting ata specific quirks in the block layer tag ordering scheme.
>> >
>> > Basically I'd like using block tag instead of an open coded tag
>> > implementation in libata. The block tag implenmentation is more
>> > sophisticated, and with blk-mq tag is built-in, so it's great to remove
>> > tag implementation in libata.
>>
>>
>> Is it great to remove?  It's not clear what the cost of maintaining it is.
>> How would you handle the case where some ata controllers want the lowest
>> available tag vs others that want round robin.
>
> I added a flag to blk-tag/blk-mq tag to implement different tag
> allocation strategy, for example, round robin and fifo.

Right, but you seem to do it at the scsi_host level, if I'm not
mistaken.  Isn't that too high for ata quirks?

>> > For sata, it's easy to do it. The problem
>> > is sas, which implements its own scsi driver/tag. My question is if we
>> > can map SCSI tag (of libsas) to ATA tag. So for example, the ipr sas
>> > driver doesn't use NCQ, so we can always use ata tag 0 in libata for
>> > ipr. I'm wondering if we can do some mapping in libsas too, so we can
>> > completely delete the tag code in libata.
>> >
>>
>> The problem with using the scsi tag for libsas is that the scsi_host may
>> support 256 commands, but only 32 per ata device.   Seems like a problem
>> blk-mq has already solved (controller + per-endpoint tag queues), but I
>> have not looked...
>
> This is exactly the issue I'm talking about. Is there a way to map the
> 256 (scsi tag) to ata tag for libsas? Like I said, for ipr sas, I'll do
> scsi tag -> ata tag 0, since it doesn't use NCQ for SATA. Is there
> something similar or whatever for libsas? From search, ipr and libsas
> are the only scsi drivers using libata.

libsas uses the libata allocated tag for NCQ.  libsas first asks
libata to allocate a tag/qc for ncq, then it creates a sas_task and
assigns the scsi tag.  To use a generic allocator I assume you would
need to flip that ordering to allocate the sas_task tag first and then
use that plus another key to allocate a sub-tag for ata to use for
NCQ.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-14 17:37                   ` Dan Williams
@ 2015-01-15  9:28                     ` Christoph Hellwig
  2015-01-15 15:02                       ` Tejun Heo
  2015-01-15 18:40                       ` Shaohua Li
  0 siblings, 2 replies; 23+ messages in thread
From: Christoph Hellwig @ 2015-01-15  9:28 UTC (permalink / raw)
  To: Dan Williams
  Cc: Shaohua Li, Tejun Heo, IDE/ATA development list, Kernel-team,
	Jens Axboe, Christoph Hellwig

On Wed, Jan 14, 2015 at 09:37:30AM -0800, Dan Williams wrote:
> libsas uses the libata allocated tag for NCQ.  libsas first asks
> libata to allocate a tag/qc for ncq, then it creates a sas_task and
> assigns the scsi tag.  To use a generic allocator I assume you would
> need to flip that ordering to allocate the sas_task tag first and then
> use that plus another key to allocate a sub-tag for ata to use for
> NCQ.

As far as I can tell all libsas drivers as well as ipr use block
layer tagging, so they always get requests/scsi_cmnds with a tag
already assigned. For libsas drivers currently use per-LUN tags,
although with scsi-mq we will get per-host tags.

libata on the other hand at the moment assigns it's own tags,
which are only stored in struct ata_queued_cmd, but never in the
request. libata assigns tags per ata_host.  Each ata_host
might have multiple ports, which each get their own Scsi_Host
allocated.

Given that ATA NCQ has a fairly low queue depth I assume we want
to stick to assigning ATA tags per-device, but due to the host
per device scheme libata that even works using scsi-mq. (Q: how do tags
work when port mulipliers are involved?)

So for SAS driver using libata we will need a separate tag allocator,
but that one might as well be in libsas instead of keeping it in libata.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15  9:28                     ` Christoph Hellwig
@ 2015-01-15 15:02                       ` Tejun Heo
  2015-01-15 18:40                       ` Shaohua Li
  1 sibling, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2015-01-15 15:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Shaohua Li, IDE/ATA development list, Kernel-team,
	Jens Axboe

Hello,

On Thu, Jan 15, 2015 at 01:28:36AM -0800, Christoph Hellwig wrote:
> libata on the other hand at the moment assigns it's own tags,
> which are only stored in struct ata_queued_cmd, but never in the
> request. libata assigns tags per ata_host.  Each ata_host

Per ata_port.

> might have multiple ports, which each get their own Scsi_Host
> allocated.
>
> Given that ATA NCQ has a fairly low queue depth I assume we want
> to stick to assigning ATA tags per-device, but due to the host
> per device scheme libata that even works using scsi-mq. (Q: how do tags
> work when port mulipliers are involved?)

It's still per-port.  The same number of tags are shared among all
devices attached via PMs.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15  9:28                     ` Christoph Hellwig
  2015-01-15 15:02                       ` Tejun Heo
@ 2015-01-15 18:40                       ` Shaohua Li
  2015-01-15 18:57                         ` Tejun Heo
  2015-01-15 18:59                         ` Dan Williams
  1 sibling, 2 replies; 23+ messages in thread
From: Shaohua Li @ 2015-01-15 18:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Tejun Heo, IDE/ATA development list, Kernel-team,
	Jens Axboe

On Thu, Jan 15, 2015 at 01:28:36AM -0800, Christoph Hellwig wrote:
> On Wed, Jan 14, 2015 at 09:37:30AM -0800, Dan Williams wrote:
> > libsas uses the libata allocated tag for NCQ.  libsas first asks
> > libata to allocate a tag/qc for ncq, then it creates a sas_task and
> > assigns the scsi tag.  To use a generic allocator I assume you would
> > need to flip that ordering to allocate the sas_task tag first and then
> > use that plus another key to allocate a sub-tag for ata to use for
> > NCQ.
> 
> As far as I can tell all libsas drivers as well as ipr use block
> layer tagging, so they always get requests/scsi_cmnds with a tag
> already assigned. For libsas drivers currently use per-LUN tags,
> although with scsi-mq we will get per-host tags.
> 
> libata on the other hand at the moment assigns it's own tags,
> which are only stored in struct ata_queued_cmd, but never in the
> request. libata assigns tags per ata_host.  Each ata_host
> might have multiple ports, which each get their own Scsi_Host
> allocated.
> 
> Given that ATA NCQ has a fairly low queue depth I assume we want
> to stick to assigning ATA tags per-device, but due to the host
> per device scheme libata that even works using scsi-mq. (Q: how do tags
> work when port mulipliers are involved?)
> 
> So for SAS driver using libata we will need a separate tag allocator,
> but that one might as well be in libsas instead of keeping it in libata.

Yep, the ATA NCQ should work well with block tag as each port is a
scsi host. So looks it's not easy to map scsi tag to ATA tag for libsas
and we eventually will have another tag allocator for libsas.

Tejun,
I'm afraid I can't work out a patch to delete the old tag allocator for
libata. Can we take this patch series first and later we can delete the
old tag allocator till libsas has its own tag allocator. This isn't
optimal, but I really know nothing about libsas.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15 18:40                       ` Shaohua Li
@ 2015-01-15 18:57                         ` Tejun Heo
  2015-01-15 18:59                         ` Dan Williams
  1 sibling, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2015-01-15 18:57 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Christoph Hellwig, Dan Williams, IDE/ATA development list,
	Kernel-team, Jens Axboe

Hello, Shaohua.

On Thu, Jan 15, 2015 at 10:40:08AM -0800, Shaohua Li wrote:
> I'm afraid I can't work out a patch to delete the old tag allocator for
> libata. Can we take this patch series first and later we can delete the
> old tag allocator till libsas has its own tag allocator. This isn't
> optimal, but I really know nothing about libsas.

As long as it's clear that this is libsas-only and the rest of code
doesn't have to care about it, I guess it'll have to do.  But please
do isolate it clearly.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15 18:40                       ` Shaohua Li
  2015-01-15 18:57                         ` Tejun Heo
@ 2015-01-15 18:59                         ` Dan Williams
  2015-01-15 19:03                           ` Tejun Heo
  2015-01-15 19:15                           ` Jens Axboe
  1 sibling, 2 replies; 23+ messages in thread
From: Dan Williams @ 2015-01-15 18:59 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Christoph Hellwig, Tejun Heo, IDE/ATA development list,
	Kernel-team, Jens Axboe

On Thu, Jan 15, 2015 at 10:40 AM, Shaohua Li <shli@fb.com> wrote:
> On Thu, Jan 15, 2015 at 01:28:36AM -0800, Christoph Hellwig wrote:
>> On Wed, Jan 14, 2015 at 09:37:30AM -0800, Dan Williams wrote:
>> > libsas uses the libata allocated tag for NCQ.  libsas first asks
>> > libata to allocate a tag/qc for ncq, then it creates a sas_task and
>> > assigns the scsi tag.  To use a generic allocator I assume you would
>> > need to flip that ordering to allocate the sas_task tag first and then
>> > use that plus another key to allocate a sub-tag for ata to use for
>> > NCQ.
>>
>> As far as I can tell all libsas drivers as well as ipr use block
>> layer tagging, so they always get requests/scsi_cmnds with a tag
>> already assigned. For libsas drivers currently use per-LUN tags,
>> although with scsi-mq we will get per-host tags.
>>
>> libata on the other hand at the moment assigns it's own tags,
>> which are only stored in struct ata_queued_cmd, but never in the
>> request. libata assigns tags per ata_host.  Each ata_host
>> might have multiple ports, which each get their own Scsi_Host
>> allocated.
>>
>> Given that ATA NCQ has a fairly low queue depth I assume we want
>> to stick to assigning ATA tags per-device, but due to the host
>> per device scheme libata that even works using scsi-mq. (Q: how do tags
>> work when port mulipliers are involved?)
>>
>> So for SAS driver using libata we will need a separate tag allocator,
>> but that one might as well be in libsas instead of keeping it in libata.
>
> Yep, the ATA NCQ should work well with block tag as each port is a
> scsi host. So looks it's not easy to map scsi tag to ATA tag for libsas
> and we eventually will have another tag allocator for libsas.
>
> Tejun,
> I'm afraid I can't work out a patch to delete the old tag allocator for
> libata. Can we take this patch series first and later we can delete the
> old tag allocator till libsas has its own tag allocator. This isn't
> optimal, but I really know nothing about libsas.

I still don't understand what we get by adding this new allocator
besides complexity, am I missing something?

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15 18:59                         ` Dan Williams
@ 2015-01-15 19:03                           ` Tejun Heo
  2015-01-15 19:15                           ` Jens Axboe
  1 sibling, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2015-01-15 19:03 UTC (permalink / raw)
  To: Dan Williams
  Cc: Shaohua Li, Christoph Hellwig, IDE/ATA development list,
	Kernel-team, Jens Axboe

On Thu, Jan 15, 2015 at 10:59:36AM -0800, Dan Williams wrote:
> I still don't understand what we get by adding this new allocator
> besides complexity, am I missing something?

Hmmm... one problem with the existing tag allocator in ata is that
it's not very efficient which actually shows up in profile when libata
is used with a very zippy SSD.  Given that ata needs a different
allocation policies anyway maybe the right thing to do is making the
existing allocator suck less.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15 18:59                         ` Dan Williams
  2015-01-15 19:03                           ` Tejun Heo
@ 2015-01-15 19:15                           ` Jens Axboe
  2015-01-15 19:51                             ` Dan Williams
  1 sibling, 1 reply; 23+ messages in thread
From: Jens Axboe @ 2015-01-15 19:15 UTC (permalink / raw)
  To: Dan Williams, Shaohua Li
  Cc: Christoph Hellwig, Tejun Heo, IDE/ATA development list, Kernel-team

On 01/15/2015 11:59 AM, Dan Williams wrote:
> On Thu, Jan 15, 2015 at 10:40 AM, Shaohua Li <shli@fb.com> wrote:
>> On Thu, Jan 15, 2015 at 01:28:36AM -0800, Christoph Hellwig wrote:
>>> On Wed, Jan 14, 2015 at 09:37:30AM -0800, Dan Williams wrote:
>>>> libsas uses the libata allocated tag for NCQ.  libsas first asks
>>>> libata to allocate a tag/qc for ncq, then it creates a sas_task and
>>>> assigns the scsi tag.  To use a generic allocator I assume you would
>>>> need to flip that ordering to allocate the sas_task tag first and then
>>>> use that plus another key to allocate a sub-tag for ata to use for
>>>> NCQ.
>>>
>>> As far as I can tell all libsas drivers as well as ipr use block
>>> layer tagging, so they always get requests/scsi_cmnds with a tag
>>> already assigned. For libsas drivers currently use per-LUN tags,
>>> although with scsi-mq we will get per-host tags.
>>>
>>> libata on the other hand at the moment assigns it's own tags,
>>> which are only stored in struct ata_queued_cmd, but never in the
>>> request. libata assigns tags per ata_host.  Each ata_host
>>> might have multiple ports, which each get their own Scsi_Host
>>> allocated.
>>>
>>> Given that ATA NCQ has a fairly low queue depth I assume we want
>>> to stick to assigning ATA tags per-device, but due to the host
>>> per device scheme libata that even works using scsi-mq. (Q: how do tags
>>> work when port mulipliers are involved?)
>>>
>>> So for SAS driver using libata we will need a separate tag allocator,
>>> but that one might as well be in libsas instead of keeping it in libata.
>>
>> Yep, the ATA NCQ should work well with block tag as each port is a
>> scsi host. So looks it's not easy to map scsi tag to ATA tag for libsas
>> and we eventually will have another tag allocator for libsas.
>>
>> Tejun,
>> I'm afraid I can't work out a patch to delete the old tag allocator for
>> libata. Can we take this patch series first and later we can delete the
>> old tag allocator till libsas has its own tag allocator. This isn't
>> optimal, but I really know nothing about libsas.
>
> I still don't understand what we get by adding this new allocator
> besides complexity, am I missing something?

Two things:

- libata tag allocator sucks. Like seriously sucks, it's almost a worst 
case implementation.
- Much better to have a single unified allocator to tweak and tune, than 
having separate version.

#2 is still lacking a bit, but I don't think it'd be impossible to unify 
it all.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15 19:15                           ` Jens Axboe
@ 2015-01-15 19:51                             ` Dan Williams
  2015-01-15 22:28                               ` Jens Axboe
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Williams @ 2015-01-15 19:51 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Shaohua Li, Christoph Hellwig, Tejun Heo,
	IDE/ATA development list, Kernel-team

On Thu, Jan 15, 2015 at 11:15 AM, Jens Axboe <axboe@fb.com> wrote:
> On 01/15/2015 11:59 AM, Dan Williams wrote:
>> I still don't understand what we get by adding this new allocator
>> besides complexity, am I missing something?
>
>
> Two things:
>
> - libata tag allocator sucks. Like seriously sucks, it's almost a worst case
> implementation.

Not questioning its suckiness, but I thought the SATA suckiness made
it moot.  Apparently not in all cases...

> - Much better to have a single unified allocator to tweak and tune, than
> having separate version.
>
> #2 is still lacking a bit, but I don't think it'd be impossible to unify it
> all.

https://bugzilla.kernel.org/show_bug.cgi?id=87101 has gone silent, I
need to ping it.  That's my primary concern with the current proposal,
supporting controllers that have weird/unnatural relationships  with
the value of the tag.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 3/3] libata: use blk taging
  2015-01-15 19:51                             ` Dan Williams
@ 2015-01-15 22:28                               ` Jens Axboe
  0 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2015-01-15 22:28 UTC (permalink / raw)
  To: Dan Williams
  Cc: Shaohua Li, Christoph Hellwig, Tejun Heo,
	IDE/ATA development list, Kernel-team

On 01/15/2015 12:51 PM, Dan Williams wrote:
> On Thu, Jan 15, 2015 at 11:15 AM, Jens Axboe <axboe@fb.com> wrote:
>> On 01/15/2015 11:59 AM, Dan Williams wrote:
>>> I still don't understand what we get by adding this new allocator
>>> besides complexity, am I missing something?
>>
>>
>> Two things:
>>
>> - libata tag allocator sucks. Like seriously sucks, it's almost a worst case
>> implementation.
>
> Not questioning its suckiness, but I thought the SATA suckiness made
> it moot.  Apparently not in all cases...

The laptop I'm typing this from does 145K 4k random read IOPS, it's 
definitely into the area of it mattering.

>> - Much better to have a single unified allocator to tweak and tune, than
>> having separate version.
>>
>> #2 is still lacking a bit, but I don't think it'd be impossible to unify it
>> all.
>
> https://bugzilla.kernel.org/show_bug.cgi?id=87101 has gone silent, I
> need to ping it.  That's my primary concern with the current proposal,
> supporting controllers that have weird/unnatural relationships  with
> the value of the tag.

Unfortunately parts of SATA is as crappy as USB when it comes to things 
like that. I can understand why some controllers would like to see a 
natural ordering of the tags (even if it is stupid to require, but AHCI 
doesn't help there), but it makes very little sense why it would break 
others. Looks like this particular case was likely a different bug, the 
ordering just made it show up more easily.

And speaking of strict ordering, the blk-mq tagging should actually 
improve ordering. The libata implementation orders globally, but that'll 
equally break down on multiple processes accessing the device. For that 
case, you end up interleaving, and if the drive does strict by-tag 
ordering of what IO to do, it'll go random pretty quickly. The blk-mq 
implementation preserves ordering between threads in that case, due to 
how the last tag is cached. So I would expect to see an improvement in 
behavior with that for use cases that offload IO to thread pools (like 
posix aio, or private implementations in programs).

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2015-01-15 22:28 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-18 18:46 [PATCH v2 1/3] block: support different tag allocation policy Shaohua Li
2014-12-18 18:46 ` [PATCH v2 2/3] blk-mq: add " Shaohua Li
2014-12-22 23:25   ` Jens Axboe
2015-01-02 19:43     ` Shaohua Li
2014-12-18 18:46 ` [PATCH v2 3/3] libata: use blk taging Shaohua Li
2015-01-09 18:15   ` Shaohua Li
2015-01-09 21:43     ` Tejun Heo
2015-01-09 21:59       ` Shaohua Li
2015-01-09 22:12         ` Shaohua Li
2015-01-14  8:08           ` Dan Williams
2015-01-14 16:30             ` Shaohua Li
2015-01-14 17:09               ` Dan Williams
     [not found]               ` <CAPcyv4hMwpGzeaovnXtqyYFM46wodRamZd7CNVoR43JJYh0Tjg@mail.gmail.com>
2015-01-14 17:13                 ` Shaohua Li
2015-01-14 17:37                   ` Dan Williams
2015-01-15  9:28                     ` Christoph Hellwig
2015-01-15 15:02                       ` Tejun Heo
2015-01-15 18:40                       ` Shaohua Li
2015-01-15 18:57                         ` Tejun Heo
2015-01-15 18:59                         ` Dan Williams
2015-01-15 19:03                           ` Tejun Heo
2015-01-15 19:15                           ` Jens Axboe
2015-01-15 19:51                             ` Dan Williams
2015-01-15 22:28                               ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.