All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] nvme: remove support or stream based temperature hint
@ 2022-03-04 17:55 Christoph Hellwig
  2022-03-04 17:55 ` [PATCH 2/2] block: remove the per-bio/request write hint Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 43+ messages in thread
From: Christoph Hellwig @ 2022-03-04 17:55 UTC (permalink / raw)
  To: axboe
  Cc: sagi, kbusch, song, linux-block, linux-raid, linux-nvme, linux-fsdevel

This support was added for RocksDB, but RocksDB ended up not using it.
At the same time drives on the open marked (vs those build for OEMs
for non-Linux support) that actually support streams are extremly
rare.  Don't bloat the nvme driver for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/core.c | 140 ---------------------------------------
 1 file changed, 140 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5e0bfda04bd7b..ca1504c178162 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -77,10 +77,6 @@ module_param(apst_secondary_latency_tol_us, ulong, 0644);
 MODULE_PARM_DESC(apst_secondary_latency_tol_us,
 	"secondary APST latency tolerance in us");
 
-static bool streams;
-module_param(streams, bool, 0644);
-MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
-
 /*
  * nvme_wq - hosts nvme related works that are not reset or delete
  * nvme_reset_wq - hosts nvme reset works
@@ -714,105 +710,6 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
 }
 EXPORT_SYMBOL_GPL(__nvme_check_ready);
 
-static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
-{
-	struct nvme_command c = { };
-
-	c.directive.opcode = nvme_admin_directive_send;
-	c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
-	c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
-	c.directive.dtype = NVME_DIR_IDENTIFY;
-	c.directive.tdtype = NVME_DIR_STREAMS;
-	c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
-
-	return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
-}
-
-static int nvme_disable_streams(struct nvme_ctrl *ctrl)
-{
-	return nvme_toggle_streams(ctrl, false);
-}
-
-static int nvme_enable_streams(struct nvme_ctrl *ctrl)
-{
-	return nvme_toggle_streams(ctrl, true);
-}
-
-static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
-				  struct streams_directive_params *s, u32 nsid)
-{
-	struct nvme_command c = { };
-
-	memset(s, 0, sizeof(*s));
-
-	c.directive.opcode = nvme_admin_directive_recv;
-	c.directive.nsid = cpu_to_le32(nsid);
-	c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s)));
-	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
-	c.directive.dtype = NVME_DIR_STREAMS;
-
-	return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
-}
-
-static int nvme_configure_directives(struct nvme_ctrl *ctrl)
-{
-	struct streams_directive_params s;
-	int ret;
-
-	if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
-		return 0;
-	if (!streams)
-		return 0;
-
-	ret = nvme_enable_streams(ctrl);
-	if (ret)
-		return ret;
-
-	ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
-	if (ret)
-		goto out_disable_stream;
-
-	ctrl->nssa = le16_to_cpu(s.nssa);
-	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
-		dev_info(ctrl->device, "too few streams (%u) available\n",
-					ctrl->nssa);
-		goto out_disable_stream;
-	}
-
-	ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
-	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
-	return 0;
-
-out_disable_stream:
-	nvme_disable_streams(ctrl);
-	return ret;
-}
-
-/*
- * Check if 'req' has a write hint associated with it. If it does, assign
- * a valid namespace stream to the write.
- */
-static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
-				     struct request *req, u16 *control,
-				     u32 *dsmgmt)
-{
-	enum rw_hint streamid = req->write_hint;
-
-	if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
-		streamid = 0;
-	else {
-		streamid--;
-		if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
-			return;
-
-		*control |= NVME_RW_DTYPE_STREAMS;
-		*dsmgmt |= streamid << 16;
-	}
-
-	if (streamid < ARRAY_SIZE(req->q->write_hints))
-		req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
-}
-
 static inline void nvme_setup_flush(struct nvme_ns *ns,
 		struct nvme_command *cmnd)
 {
@@ -916,7 +813,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		struct request *req, struct nvme_command *cmnd,
 		enum nvme_opcode op)
 {
-	struct nvme_ctrl *ctrl = ns->ctrl;
 	u16 control = 0;
 	u32 dsmgmt = 0;
 
@@ -939,9 +835,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 	cmnd->rw.apptag = 0;
 	cmnd->rw.appmask = 0;
 
-	if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
-		nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
-
 	if (ns->ms) {
 		/*
 		 * If formated with metadata, the block layer always provides a
@@ -1662,9 +1555,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
 		return;
 	}
 
-	if (ctrl->nr_streams && ns->sws && ns->sgs)
-		size *= ns->sws * ns->sgs;
-
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
@@ -1697,31 +1587,6 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 		a->csi == b->csi;
 }
 
-static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-				 u32 *phys_bs, u32 *io_opt)
-{
-	struct streams_directive_params s;
-	int ret;
-
-	if (!ctrl->nr_streams)
-		return 0;
-
-	ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
-	if (ret)
-		return ret;
-
-	ns->sws = le32_to_cpu(s.sws);
-	ns->sgs = le16_to_cpu(s.sgs);
-
-	if (ns->sws) {
-		*phys_bs = ns->sws * (1 << ns->lba_shift);
-		if (ns->sgs)
-			*io_opt = *phys_bs * ns->sgs;
-	}
-
-	return 0;
-}
-
 static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -1814,7 +1679,6 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_integrity_unregister(disk);
 
 	atomic_bs = phys_bs = bs;
-	nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
 	if (id->nabo == 0) {
 		/*
 		 * Bit 1 indicates whether NAWUPF is defined for this namespace
@@ -3103,10 +2967,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
 	if (ret < 0)
 		return ret;
 
-	ret = nvme_configure_directives(ctrl);
-	if (ret < 0)
-		return ret;
-
 	ret = nvme_configure_acre(ctrl);
 	if (ret < 0)
 		return ret;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-04 17:55 [PATCH 1/2] nvme: remove support or stream based temperature hint Christoph Hellwig
@ 2022-03-04 17:55 ` Christoph Hellwig
  2022-03-04 19:24   ` Jens Axboe
  2022-03-04 22:12   ` Dave Chinner
  2022-03-04 19:34 ` [PATCH 1/2] nvme: remove support or stream based temperature hint Keith Busch
  2022-03-04 20:20 ` Jens Axboe
  2 siblings, 2 replies; 43+ messages in thread
From: Christoph Hellwig @ 2022-03-04 17:55 UTC (permalink / raw)
  To: axboe
  Cc: sagi, kbusch, song, linux-block, linux-raid, linux-nvme, linux-fsdevel

With the NVMe support for this gone, there are no consumers of these hints
left, so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/bio.c                 |  2 --
 block/blk-crypto-fallback.c |  1 -
 block/blk-merge.c           | 14 --------------
 block/blk-mq-debugfs.c      | 24 ------------------------
 block/blk-mq.c              |  1 -
 block/bounce.c              |  1 -
 block/fops.c                |  3 ---
 drivers/md/raid1.c          |  2 --
 drivers/md/raid5-ppl.c      | 28 +++-------------------------
 drivers/md/raid5.c          |  6 ------
 fs/btrfs/extent_io.c        |  1 -
 fs/buffer.c                 | 13 +++++--------
 fs/direct-io.c              |  3 ---
 fs/ext4/page-io.c           |  5 +----
 fs/f2fs/data.c              |  2 --
 fs/gfs2/lops.c              |  1 -
 fs/iomap/buffered-io.c      |  2 --
 fs/iomap/direct-io.c        |  1 -
 fs/mpage.c                  |  1 -
 fs/zonefs/super.c           |  1 -
 include/linux/blk_types.h   |  1 -
 include/linux/blkdev.h      |  3 ---
 22 files changed, 9 insertions(+), 107 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index b15f5466ce084..88e63bb187482 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -257,7 +257,6 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 	bio->bi_opf = opf;
 	bio->bi_flags = 0;
 	bio->bi_ioprio = 0;
-	bio->bi_write_hint = 0;
 	bio->bi_status = 0;
 	bio->bi_iter.bi_sector = 0;
 	bio->bi_iter.bi_size = 0;
@@ -737,7 +736,6 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 	    bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
 	bio->bi_ioprio = bio_src->bi_ioprio;
-	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 
 	bio_clone_blkg_association(bio, bio_src);
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 18c8eafe20b94..7c854584b52b5 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -170,7 +170,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
 		bio_set_flag(bio, BIO_REMAPPED);
 	bio->bi_opf		= bio_src->bi_opf;
 	bio->bi_ioprio		= bio_src->bi_ioprio;
-	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index f5255991b773c..0e871d4e7cb8d 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -782,13 +782,6 @@ static struct request *attempt_merge(struct request_queue *q,
 	    !blk_write_same_mergeable(req->bio, next->bio))
 		return NULL;
 
-	/*
-	 * Don't allow merge of different write hints, or for a hint with
-	 * non-hint IO.
-	 */
-	if (req->write_hint != next->write_hint)
-		return NULL;
-
 	if (req->ioprio != next->ioprio)
 		return NULL;
 
@@ -915,13 +908,6 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
-	/*
-	 * Don't allow merge of different write hints, or for a hint with
-	 * non-hint IO.
-	 */
-	if (rq->write_hint != bio->bi_write_hint)
-		return false;
-
 	if (rq->ioprio != bio_prio(bio))
 		return false;
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3a790eb4995c6..c2904c75c160f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -183,35 +183,11 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
 	return count;
 }
 
-static int queue_write_hint_show(void *data, struct seq_file *m)
-{
-	struct request_queue *q = data;
-	int i;
-
-	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
-		seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
-
-	return 0;
-}
-
-static ssize_t queue_write_hint_store(void *data, const char __user *buf,
-				      size_t count, loff_t *ppos)
-{
-	struct request_queue *q = data;
-	int i;
-
-	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
-		q->write_hints[i] = 0;
-
-	return count;
-}
-
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{ "poll_stat", 0400, queue_poll_stat_show },
 	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
 	{ "pm_only", 0600, queue_pm_only_show, NULL },
 	{ "state", 0600, queue_state_show, queue_state_write },
-	{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
 	{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
 	{ },
 };
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a05ce77250316..f28023b0a87eb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2402,7 +2402,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 		rq->cmd_flags |= REQ_FAILFAST_MASK;
 
 	rq->__sector = bio->bi_iter.bi_sector;
-	rq->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(rq, bio, nr_segs);
 
 	/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
diff --git a/block/bounce.c b/block/bounce.c
index 3d50d19cde72a..9db1256d57d55 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -169,7 +169,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
 	bio->bi_ioprio		= bio_src->bi_ioprio;
-	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
diff --git a/block/fops.c b/block/fops.c
index 3696665e586a8..037f12294c863 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -83,7 +83,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
 	}
 	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
-	bio.bi_write_hint = iocb->ki_hint;
 	bio.bi_private = current;
 	bio.bi_end_io = blkdev_bio_end_io_simple;
 	bio.bi_ioprio = iocb->ki_ioprio;
@@ -225,7 +224,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 
 	for (;;) {
 		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
-		bio->bi_write_hint = iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = blkdev_bio_end_io;
 		bio->bi_ioprio = iocb->ki_ioprio;
@@ -325,7 +323,6 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	dio->flags = 0;
 	dio->iocb = iocb;
 	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
-	bio->bi_write_hint = iocb->ki_hint;
 	bio->bi_end_io = blkdev_bio_end_io_async;
 	bio->bi_ioprio = iocb->ki_ioprio;
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c3288d46948de..c3ce6afd60a71 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1137,8 +1137,6 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
 		goto skip_copy;
 	}
 
-	behind_bio->bi_write_hint = bio->bi_write_hint;
-
 	while (i < vcnt && size) {
 		struct page *page;
 		int len = min_t(int, PAGE_SIZE, size);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 3446797fa0aca..282ce16ee4e1f 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -469,7 +469,6 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
 	bio_set_dev(bio, log->rdev->bdev);
 	bio->bi_iter.bi_sector = log->next_io_sector;
 	bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
-	bio->bi_write_hint = ppl_conf->write_hint;
 
 	pr_debug("%s: log->current_io_sector: %llu\n", __func__,
 	    (unsigned long long)log->next_io_sector);
@@ -499,7 +498,6 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
 			bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS,
 					       prev->bi_opf, GFP_NOIO,
 					       &ppl_conf->bs);
-			bio->bi_write_hint = prev->bi_write_hint;
 			bio->bi_iter.bi_sector = bio_end_sector(prev);
 			bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
 
@@ -1402,7 +1400,6 @@ int ppl_init_log(struct r5conf *conf)
 	atomic64_set(&ppl_conf->seq, 0);
 	INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
 	spin_lock_init(&ppl_conf->no_mem_stripes_lock);
-	ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
 
 	if (!mddev->external) {
 		ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
@@ -1501,25 +1498,13 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
 static ssize_t
 ppl_write_hint_show(struct mddev *mddev, char *buf)
 {
-	size_t ret = 0;
-	struct r5conf *conf;
-	struct ppl_conf *ppl_conf = NULL;
-
-	spin_lock(&mddev->lock);
-	conf = mddev->private;
-	if (conf && raid5_has_ppl(conf))
-		ppl_conf = conf->log_private;
-	ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0);
-	spin_unlock(&mddev->lock);
-
-	return ret;
+	return sprintf(buf, "%d\n", 0);
 }
 
 static ssize_t
 ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
 {
 	struct r5conf *conf;
-	struct ppl_conf *ppl_conf;
 	int err = 0;
 	unsigned short new;
 
@@ -1533,17 +1518,10 @@ ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
 		return err;
 
 	conf = mddev->private;
-	if (!conf) {
+	if (!conf)
 		err = -ENODEV;
-	} else if (raid5_has_ppl(conf)) {
-		ppl_conf = conf->log_private;
-		if (!ppl_conf)
-			err = -EINVAL;
-		else
-			ppl_conf->write_hint = new;
-	} else {
+	else if (!raid5_has_ppl(conf) || !conf->log_private)
 		err = -EINVAL;
-	}
 
 	mddev_unlock(mddev);
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8891aaba65964..78503db55ca4e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1210,9 +1210,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
 			bi->bi_io_vec[0].bv_offset = sh->dev[i].offset;
 			bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
-			bi->bi_write_hint = sh->dev[i].write_hint;
-			if (!rrdev)
-				sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
 			/*
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
@@ -1264,8 +1261,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
 			rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset;
 			rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
-			rbi->bi_write_hint = sh->dev[i].write_hint;
-			sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
 			/*
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
@@ -3416,7 +3411,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
 		(unsigned long long)sh->sector);
 
 	spin_lock_irq(&sh->stripe_lock);
-	sh->dev[dd_idx].write_hint = bi->bi_write_hint;
 	/* Don't allow new IO added to stripes in batch list */
 	if (sh->batch_head)
 		goto overlap;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dee86911a4bef..1412f09537c73 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3321,7 +3321,6 @@ static int alloc_new_bio(struct btrfs_inode *inode,
 	bio_ctrl->bio_flags = bio_flags;
 	bio->bi_end_io = end_io_func;
 	bio->bi_private = &inode->io_tree;
-	bio->bi_write_hint = inode->vfs_inode.i_write_hint;
 	bio->bi_opf = opf;
 	ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
 	if (ret < 0)
diff --git a/fs/buffer.c b/fs/buffer.c
index a17c386a142c7..29c6c60660f61 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -53,7 +53,7 @@
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 enum rw_hint hint, struct writeback_control *wbc);
+			 struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1806,8 +1806,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
-					inode->i_write_hint, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1861,8 +1860,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
-					inode->i_write_hint, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -3008,7 +3006,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
 }
 
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 enum rw_hint write_hint, struct writeback_control *wbc)
+			 struct writeback_control *wbc)
 {
 	struct bio *bio;
 
@@ -3034,7 +3032,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio->bi_write_hint = write_hint;
 
 	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
 	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3056,7 +3053,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 int submit_bh(int op, int op_flags, struct buffer_head *bh)
 {
-	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
+	return submit_bh_wbc(op, op_flags, bh, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 38bca4980a1ca..aef06e607b405 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -402,9 +402,6 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 		bio->bi_end_io = dio_bio_end_aio;
 	else
 		bio->bi_end_io = dio_bio_end_io;
-
-	bio->bi_write_hint = dio->iocb->ki_hint;
-
 	sdio->bio = bio;
 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1253982268730..0c31b61c7628a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,7 +374,6 @@ void ext4_io_submit(struct ext4_io_submit *io)
 	if (bio) {
 		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
 				  REQ_SYNC : 0;
-		io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
 		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
 		submit_bio(io->io_bio);
 	}
@@ -420,10 +419,8 @@ static void io_submit_add_bh(struct ext4_io_submit *io,
 submit_and_retry:
 		ext4_io_submit(io);
 	}
-	if (io->io_bio == NULL) {
+	if (io->io_bio == NULL)
 		io_submit_init_bio(io, bh);
-		io->io_bio->bi_write_hint = inode->i_write_hint;
-	}
 	ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
 	if (ret != bh->b_size)
 		goto submit_and_retry;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e71dde8de0db0..20d65aa6243a1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -403,8 +403,6 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 	} else {
 		bio->bi_end_io = f2fs_write_end_io;
 		bio->bi_private = sbi;
-		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
-						fio->type, fio->temp);
 	}
 	iostat_alloc_and_bind_ctx(sbi, bio, NULL);
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4ae1eefae616d..6ba51cbb94cf2 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -491,7 +491,6 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
 	new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO);
 	bio_clone_blkg_association(new, prev);
 	new->bi_iter.bi_sector = bio_end_sector(prev);
-	new->bi_write_hint = prev->bi_write_hint;
 	bio_chain(new, prev);
 	submit_bio(prev);
 	return new;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 491534e908615..fedca4de5f6b9 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1199,7 +1199,6 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
 			       REQ_OP_WRITE | wbc_to_write_flags(wbc),
 			       GFP_NOFS, &iomap_ioend_bioset);
 	bio->bi_iter.bi_sector = sector;
-	bio->bi_write_hint = inode->i_write_hint;
 	wbc_init_bio(wbc, bio);
 
 	ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
@@ -1228,7 +1227,6 @@ iomap_chain_bio(struct bio *prev)
 	new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
 	bio_clone_blkg_association(new, prev);
 	new->bi_iter.bi_sector = bio_end_sector(prev);
-	new->bi_write_hint = prev->bi_write_hint;
 
 	bio_chain(prev, new);
 	bio_get(prev);		/* for iomap_finish_ioend */
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index e2ba13645ef28..a434b1829545d 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -309,7 +309,6 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 
 		bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL);
 		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
-		bio->bi_write_hint = dio->iocb->ki_hint;
 		bio->bi_ioprio = dio->iocb->ki_ioprio;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
diff --git a/fs/mpage.c b/fs/mpage.c
index dbfc02e23d97f..89eeef275a235 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -590,7 +590,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 		bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 
 		wbc_init_bio(wbc, bio);
-		bio->bi_write_hint = inode->i_write_hint;
 	}
 
 	/*
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index d331b52592a0a..b71a23dd12556 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -695,7 +695,6 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
 	bio = bio_alloc(bdev, nr_pages,
 			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
 	bio->bi_iter.bi_sector = zi->i_zsector;
-	bio->bi_write_hint = iocb->ki_hint;
 	bio->bi_ioprio = iocb->ki_ioprio;
 	if (iocb->ki_flags & IOCB_DSYNC)
 		bio->bi_opf |= REQ_FUA;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5561e58d158ac..ba8cfa57255f3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -250,7 +250,6 @@ struct bio {
 						 */
 	unsigned short		bi_flags;	/* BIO_* below */
 	unsigned short		bi_ioprio;
-	unsigned short		bi_write_hint;
 	blk_status_t		bi_status;
 	atomic_t		__bi_remaining;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e19947d84f128..bb6d5ee02c070 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -518,9 +518,6 @@ struct request_queue {
 
 	bool			mq_sysfs_init_done;
 
-#define BLK_MAX_WRITE_HINTS	5
-	u64			write_hints[BLK_MAX_WRITE_HINTS];
-
 	/*
 	 * Independent sector access ranges. This is always NULL for
 	 * devices that do not have multiple independent access ranges.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-04 17:55 ` [PATCH 2/2] block: remove the per-bio/request write hint Christoph Hellwig
@ 2022-03-04 19:24   ` Jens Axboe
  2022-03-04 22:12   ` Dave Chinner
  1 sibling, 0 replies; 43+ messages in thread
From: Jens Axboe @ 2022-03-04 19:24 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sagi, kbusch, song, linux-block, linux-raid, linux-nvme, linux-fsdevel

On 3/4/22 10:55 AM, Christoph Hellwig wrote:
> With the NVMe support for this gone, there are no consumers of these hints
> left, so remove them.

Looks good to me, didn't find any missed spots.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/2] nvme: remove support or stream based temperature hint
  2022-03-04 17:55 [PATCH 1/2] nvme: remove support or stream based temperature hint Christoph Hellwig
  2022-03-04 17:55 ` [PATCH 2/2] block: remove the per-bio/request write hint Christoph Hellwig
@ 2022-03-04 19:34 ` Keith Busch
  2022-03-04 19:36   ` Christoph Hellwig
  2022-03-04 20:20 ` Jens Axboe
  2 siblings, 1 reply; 43+ messages in thread
From: Keith Busch @ 2022-03-04 19:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: axboe, sagi, song, linux-block, linux-raid, linux-nvme, linux-fsdevel

On Fri, Mar 04, 2022 at 06:55:55PM +0100, Christoph Hellwig wrote:
> -	ctrl->nssa = le16_to_cpu(s.nssa);
> -	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
> -		dev_info(ctrl->device, "too few streams (%u) available\n",
> -					ctrl->nssa);
> -		goto out_disable_stream;
> -	}

Just fyi, looks like the patch was built against an older version of the
driver, so it doesn't apply cleanly to nvme-5.18 at the above part.

Also please consider folding the following in this patch since it removes all
nr_streams use:

---
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 587d92df118b..1bed663322ee 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -280,7 +280,6 @@ struct nvme_ctrl {
 	u16 crdt[3];
 	u16 oncs;
 	u16 oacs;
-	u16 nr_streams;
 	u16 sqsize;
 	u32 max_namespaces;
 	atomic_t abort_limit;
--

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/2] nvme: remove support or stream based temperature hint
  2022-03-04 19:34 ` [PATCH 1/2] nvme: remove support or stream based temperature hint Keith Busch
@ 2022-03-04 19:36   ` Christoph Hellwig
  2022-03-04 19:38     ` Jens Axboe
  0 siblings, 1 reply; 43+ messages in thread
From: Christoph Hellwig @ 2022-03-04 19:36 UTC (permalink / raw)
  To: Keith Busch
  Cc: Christoph Hellwig, axboe, sagi, song, linux-block, linux-raid,
	linux-nvme, linux-fsdevel

On Fri, Mar 04, 2022 at 11:34:39AM -0800, Keith Busch wrote:
> On Fri, Mar 04, 2022 at 06:55:55PM +0100, Christoph Hellwig wrote:
> > -	ctrl->nssa = le16_to_cpu(s.nssa);
> > -	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
> > -		dev_info(ctrl->device, "too few streams (%u) available\n",
> > -					ctrl->nssa);
> > -		goto out_disable_stream;
> > -	}
> 
> Just fyi, looks like the patch was built against an older version of the
> driver, so it doesn't apply cleanly to nvme-5.18 at the above part.
> 
> Also please consider folding the following in this patch since it removes all
> nr_streams use:

This was against Jens' for-5.18/block tree.  I'm a bit lost what tree
to best send it against as there will always be some conflicts.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/2] nvme: remove support or stream based temperature hint
  2022-03-04 19:36   ` Christoph Hellwig
@ 2022-03-04 19:38     ` Jens Axboe
  0 siblings, 0 replies; 43+ messages in thread
From: Jens Axboe @ 2022-03-04 19:38 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch
  Cc: sagi, song, linux-block, linux-raid, linux-nvme, linux-fsdevel

On 3/4/22 12:36 PM, Christoph Hellwig wrote:
> On Fri, Mar 04, 2022 at 11:34:39AM -0800, Keith Busch wrote:
>> On Fri, Mar 04, 2022 at 06:55:55PM +0100, Christoph Hellwig wrote:
>>> -	ctrl->nssa = le16_to_cpu(s.nssa);
>>> -	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
>>> -		dev_info(ctrl->device, "too few streams (%u) available\n",
>>> -					ctrl->nssa);
>>> -		goto out_disable_stream;
>>> -	}
>>
>> Just fyi, looks like the patch was built against an older version of the
>> driver, so it doesn't apply cleanly to nvme-5.18 at the above part.
>>
>> Also please consider folding the following in this patch since it removes all
>> nr_streams use:
> 
> This was against Jens' for-5.18/block tree.  I'm a bit lost what tree
> to best send it against as there will always be some conflicts.

It's always a bit of a problem when patches touch both drivers and core.
I actually put this one in a special branch just because of that. I'll
fold in Keith's incremental.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 1/2] nvme: remove support or stream based temperature hint
  2022-03-04 17:55 [PATCH 1/2] nvme: remove support or stream based temperature hint Christoph Hellwig
  2022-03-04 17:55 ` [PATCH 2/2] block: remove the per-bio/request write hint Christoph Hellwig
  2022-03-04 19:34 ` [PATCH 1/2] nvme: remove support or stream based temperature hint Keith Busch
@ 2022-03-04 20:20 ` Jens Axboe
  2 siblings, 0 replies; 43+ messages in thread
From: Jens Axboe @ 2022-03-04 20:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-block, linux-fsdevel, linux-nvme, sagi, kbusch, linux-raid, song

On Fri, 4 Mar 2022 18:55:55 +0100, Christoph Hellwig wrote:
> This support was added for RocksDB, but RocksDB ended up not using it.
> At the same time drives on the open marked (vs those build for OEMs
> for non-Linux support) that actually support streams are extremly
> rare.  Don't bloat the nvme driver for it.
> 
> 

Applied, thanks!

[1/2] nvme: remove support or stream based temperature hint
      commit: 86cc47f6c199a71fdb28fe781174d9974ee14304
[2/2] block: remove the per-bio/request write hint
      commit: 6928b8f7eafaec1f0ea318fec71b537a165e552b

Best regards,
-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-04 17:55 ` [PATCH 2/2] block: remove the per-bio/request write hint Christoph Hellwig
  2022-03-04 19:24   ` Jens Axboe
@ 2022-03-04 22:12   ` Dave Chinner
  2022-03-05  5:19     ` Christoph Hellwig
  1 sibling, 1 reply; 43+ messages in thread
From: Dave Chinner @ 2022-03-04 22:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: axboe, sagi, kbusch, song, linux-block, linux-raid, linux-nvme,
	linux-fsdevel

On Fri, Mar 04, 2022 at 06:55:56PM +0100, Christoph Hellwig wrote:
> With the NVMe support for this gone, there are no consumers of these hints
> left, so remove them.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/bio.c                 |  2 --
>  block/blk-crypto-fallback.c |  1 -
>  block/blk-merge.c           | 14 --------------
>  block/blk-mq-debugfs.c      | 24 ------------------------
>  block/blk-mq.c              |  1 -
>  block/bounce.c              |  1 -
>  block/fops.c                |  3 ---
>  drivers/md/raid1.c          |  2 --
>  drivers/md/raid5-ppl.c      | 28 +++-------------------------
>  drivers/md/raid5.c          |  6 ------
>  fs/btrfs/extent_io.c        |  1 -
>  fs/buffer.c                 | 13 +++++--------
>  fs/direct-io.c              |  3 ---
>  fs/ext4/page-io.c           |  5 +----
>  fs/f2fs/data.c              |  2 --
>  fs/gfs2/lops.c              |  1 -
>  fs/iomap/buffered-io.c      |  2 --
>  fs/iomap/direct-io.c        |  1 -
>  fs/mpage.c                  |  1 -
>  fs/zonefs/super.c           |  1 -
>  include/linux/blk_types.h   |  1 -
>  include/linux/blkdev.h      |  3 ---
>  22 files changed, 9 insertions(+), 107 deletions(-)

AFAICT, all the filesystem/IO path passthrough plumbing for hints is
now gone, and no hardware will ever receive hints.  Doesn't this
mean that file_write_hint(), file->f_write_hint and iocb->ki_hint
are now completely unused, too?  That also means the fcntl()s for
F_{G,S}ET_FILE_RW_HINT now have zero effect on IO, so should they be
neutered, too?

AFAICT, this patch leaves just the f2fs allocator usage of
inode->i_rw_hint to select a segment to allocate from as the
remaining consumer of this entire plumbing and user API. Is that
used by applications anywhere, or can that be removed and so the
rest of the infrastructure get removed and the fcntl()s no-op'd or
-EOPNOTSUPP?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-04 22:12   ` Dave Chinner
@ 2022-03-05  5:19     ` Christoph Hellwig
  2022-03-05  6:09       ` Number of parity disks Kengo.M
  2022-03-05 21:40       ` [PATCH 2/2] block: remove the per-bio/request write hint Dave Chinner
  0 siblings, 2 replies; 43+ messages in thread
From: Christoph Hellwig @ 2022-03-05  5:19 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Christoph Hellwig, axboe, sagi, kbusch, song, linux-block,
	linux-raid, linux-nvme, linux-fsdevel

On Sat, Mar 05, 2022 at 09:12:55AM +1100, Dave Chinner wrote:
> AFAICT, all the filesystem/IO path passthrough plumbing for hints is
> now gone, and no hardware will ever receive hints.  Doesn't this
> mean that file_write_hint(), file->f_write_hint and iocb->ki_hint
> are now completely unused, too?

No, for the reason tha you state below.  f2fs still uses it.

> AFAICT, this patch leaves just the f2fs allocator usage of
> inode->i_rw_hint to select a segment to allocate from as the
> remaining consumer of this entire plumbing and user API. Is that
> used by applications anywhere, or can that be removed and so the
> rest of the infrastructure get removed and the fcntl()s no-op'd or
> -EOPNOTSUPP?

I was told it is used quite heavily in android.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Number of parity disks
  2022-03-05  5:19     ` Christoph Hellwig
@ 2022-03-05  6:09       ` Kengo.M
  2022-03-05  8:56         ` Piergiorgio Sartor
  2022-03-05 21:40       ` [PATCH 2/2] block: remove the per-bio/request write hint Dave Chinner
  1 sibling, 1 reply; 43+ messages in thread
From: Kengo.M @ 2022-03-05  6:09 UTC (permalink / raw)
  To: linux-raid

Hi Folks

I use mdadm conveniently.

Let me ask a very very basic question.

One parity disk can be used in RAID 5 and 2 parity disks can be used in RAID 6.
ZFS RAIDZ-3 (raidz3) can use 3 parity disks.

Is it difficult to increase the number of parity disks to 4, 5 or more.
If so, is the reason for this because of the time it takes to 
generate the parity bits?

Kengo Miyahara

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: Number of parity disks
  2022-03-05  6:09       ` Number of parity disks Kengo.M
@ 2022-03-05  8:56         ` Piergiorgio Sartor
  0 siblings, 0 replies; 43+ messages in thread
From: Piergiorgio Sartor @ 2022-03-05  8:56 UTC (permalink / raw)
  To: Kengo.M; +Cc: linux-raid

On Sat, Mar 05, 2022 at 03:09:55PM +0900, Kengo.M wrote:
> Hi Folks
> 
> I use mdadm conveniently.
> 
> Let me ask a very very basic question.
> 
> One parity disk can be used in RAID 5 and 2 parity disks can be used in RAID 6.
> ZFS RAIDZ-3 (raidz3) can use 3 parity disks.
> 
> Is it difficult to increase the number of parity disks to 4, 5 or more.
> If so, is the reason for this because of the time it takes to generate the
> parity bits?

There was a project, some times ago, with
the aim to create a library, initially for
btrfs, capable of adding "n" parities.

This should have been usable by md as well.

The problem is that the Vandermonde matrix
might not invertible with more than 2 parities.
So, they've to use a Cauchy matrix.

It might be that performances are degrading
quite a lot.

I'm not sure about the status of such library,
maybe you can ask in the btrfs mailing list.

On the other hand, 3 parities (or more) case
might be a bit an overkill.
For md could be better to have things like
RAID-61 or similar.

bye,

-- 

piergiorgio

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-05  5:19     ` Christoph Hellwig
  2022-03-05  6:09       ` Number of parity disks Kengo.M
@ 2022-03-05 21:40       ` Dave Chinner
  2022-03-05 23:55         ` Bart Van Assche
  2022-03-06 17:11         ` Jens Axboe
  1 sibling, 2 replies; 43+ messages in thread
From: Dave Chinner @ 2022-03-05 21:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: axboe, sagi, kbusch, song, linux-block, linux-raid, linux-nvme,
	linux-fsdevel

On Sat, Mar 05, 2022 at 06:19:29AM +0100, Christoph Hellwig wrote:
> On Sat, Mar 05, 2022 at 09:12:55AM +1100, Dave Chinner wrote:
> > AFAICT, all the filesystem/IO path passthrough plumbing for hints is
> > now gone, and no hardware will ever receive hints.  Doesn't this
> > mean that file_write_hint(), file->f_write_hint and iocb->ki_hint
> > are now completely unused, too?
> 
> No, for the reason tha you state below.  f2fs still uses it.

My point is that f2fs uses i_write_hint, not f_write_hint or
ki_hint. IOWs, nothing in the IO path use the iocb or file write
hints anymore because they only ever got used to set the hint for
bios. It's now unused information.

According to the io_uring ppl, setup of unnecessary fields in the
iocb has a measurable cost and they've done work to minimise it in
the past. So if these fields are not actually used by anyone in the
IO path, why should we still pay the cost calling
ki_hint_validate(file_write_hint(file)) when setting up an iocb?

> > AFAICT, this patch leaves just the f2fs allocator usage of
> > inode->i_rw_hint to select a segment to allocate from as the
> > remaining consumer of this entire plumbing and user API. Is that
> > used by applications anywhere, or can that be removed and so the
> > rest of the infrastructure get removed and the fcntl()s no-op'd or
> > -EOPNOTSUPP?
> 
> I was told it is used quite heavily in android.

So it's primarily used by out of tree code? And that after this
patch, there's really no way to test that this API does anything
useful at all?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-05 21:40       ` [PATCH 2/2] block: remove the per-bio/request write hint Dave Chinner
@ 2022-03-05 23:55         ` Bart Van Assche
  2022-03-06 17:11         ` Jens Axboe
  1 sibling, 0 replies; 43+ messages in thread
From: Bart Van Assche @ 2022-03-05 23:55 UTC (permalink / raw)
  To: Dave Chinner, Christoph Hellwig
  Cc: axboe, sagi, kbusch, song, linux-block, linux-raid, linux-nvme,
	linux-fsdevel, Jaegeuk Kim

On 3/5/22 13:40, Dave Chinner wrote:
> On Sat, Mar 05, 2022 at 06:19:29AM +0100, Christoph Hellwig wrote:
>> On Sat, Mar 05, 2022 at 09:12:55AM +1100, Dave Chinner wrote:
>>> AFAICT, this patch leaves just the f2fs allocator usage of
>>> inode->i_rw_hint to select a segment to allocate from as the
>>> remaining consumer of this entire plumbing and user API. Is that
>>> used by applications anywhere, or can that be removed and so the
>>> rest of the infrastructure get removed and the fcntl()s no-op'd or
>>> -EOPNOTSUPP?
>>
>> I was told it is used quite heavily in android.
> 
> So it's primarily used by out of tree code? And that after this
> patch, there's really no way to test that this API does anything
> useful at all?

Hi Dave,

Android kernel developers follow the "upstream first" policy for core kernel 
code (this means all kernel code other than kernel drivers that implement 
support for the phone SoC). As a result, the Android 13 F2FS implementation is 
very close to the upstream F2FS code. So the statement above about "out of tree 
code" is not correct.

Bart.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-05 21:40       ` [PATCH 2/2] block: remove the per-bio/request write hint Dave Chinner
  2022-03-05 23:55         ` Bart Van Assche
@ 2022-03-06 17:11         ` Jens Axboe
  2022-03-06 18:01           ` Christoph Hellwig
  1 sibling, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-06 17:11 UTC (permalink / raw)
  To: Dave Chinner, Christoph Hellwig
  Cc: sagi, kbusch, song, linux-block, linux-raid, linux-nvme, linux-fsdevel

On 3/5/22 2:40 PM, Dave Chinner wrote:
> On Sat, Mar 05, 2022 at 06:19:29AM +0100, Christoph Hellwig wrote:
>> On Sat, Mar 05, 2022 at 09:12:55AM +1100, Dave Chinner wrote:
>>> AFAICT, all the filesystem/IO path passthrough plumbing for hints is
>>> now gone, and no hardware will ever receive hints.  Doesn't this
>>> mean that file_write_hint(), file->f_write_hint and iocb->ki_hint
>>> are now completely unused, too?
>>
>> No, for the reason tha you state below.  f2fs still uses it.
> 
> My point is that f2fs uses i_write_hint, not f_write_hint or
> ki_hint. IOWs, nothing in the IO path use the iocb or file write
> hints anymore because they only ever got used to set the hint for
> bios. It's now unused information.
> 
> According to the io_uring ppl, setup of unnecessary fields in the
> iocb has a measurable cost and they've done work to minimise it in
> the past. So if these fields are not actually used by anyone in the
> IO path, why should we still pay the cost calling
> ki_hint_validate(file_write_hint(file)) when setting up an iocb?

Yes, I think we should kill it. If we retain the inode hint, the f2fs
doesn't need a any changes. And it should be safe to make the per-file
fcntl hints return EINVAL, which they would on older kernels anyway.
Untested, but something like the below.


diff --git a/fs/aio.c b/fs/aio.c
index 4ceba13a7db0..eb0948bb74f1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1478,7 +1478,6 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 	req->ki_flags = iocb_flags(req->ki_filp);
 	if (iocb->aio_flags & IOCB_FLAG_RESFD)
 		req->ki_flags |= IOCB_EVENTFD;
-	req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
 	if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
 		/*
 		 * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 753986ea1583..bc7c7a7d9260 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -138,7 +138,6 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
 	ki->iocb.ki_filp	= file;
 	ki->iocb.ki_pos		= start_pos + skipped;
 	ki->iocb.ki_flags	= IOCB_DIRECT;
-	ki->iocb.ki_hint	= ki_hint_validate(file_write_hint(file));
 	ki->iocb.ki_ioprio	= get_current_ioprio();
 	ki->skipped		= skipped;
 	ki->object		= object;
@@ -313,7 +312,6 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
 	ki->iocb.ki_filp	= file;
 	ki->iocb.ki_pos		= start_pos;
 	ki->iocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE;
-	ki->iocb.ki_hint	= ki_hint_validate(file_write_hint(file));
 	ki->iocb.ki_ioprio	= get_current_ioprio();
 	ki->object		= object;
 	ki->inval_counter	= cres->inval_counter;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c98ef6af97d..45076c01a2ba 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4479,10 +4479,8 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	const bool do_opu = f2fs_lfs_mode(sbi);
-	const int whint_mode = F2FS_OPTION(sbi).whint_mode;
 	const loff_t pos = iocb->ki_pos;
 	const ssize_t count = iov_iter_count(from);
-	const enum rw_hint hint = iocb->ki_hint;
 	unsigned int dio_flags;
 	struct iomap_dio *dio;
 	ssize_t ret;
@@ -4515,8 +4513,6 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 		if (do_opu)
 			down_read(&fi->i_gc_rwsem[READ]);
 	}
-	if (whint_mode == WHINT_MODE_OFF)
-		iocb->ki_hint = WRITE_LIFE_NOT_SET;
 
 	/*
 	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
@@ -4539,8 +4535,6 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 		ret = iomap_dio_complete(dio);
 	}
 
-	if (whint_mode == WHINT_MODE_OFF)
-		iocb->ki_hint = hint;
 	if (do_opu)
 		up_read(&fi->i_gc_rwsem[READ]);
 	up_read(&fi->i_gc_rwsem[WRITE]);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9c6c6a3e2de5..e26444e977e1 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -292,21 +292,8 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd,
 
 	switch (cmd) {
 	case F_GET_FILE_RW_HINT:
-		h = file_write_hint(file);
-		if (copy_to_user(argp, &h, sizeof(*argp)))
-			return -EFAULT;
-		return 0;
 	case F_SET_FILE_RW_HINT:
-		if (copy_from_user(&h, argp, sizeof(h)))
-			return -EFAULT;
-		hint = (enum rw_hint) h;
-		if (!rw_hint_valid(hint))
-			return -EINVAL;
-
-		spin_lock(&file->f_lock);
-		file->f_write_hint = hint;
-		spin_unlock(&file->f_lock);
-		return 0;
+		return -EINVAL;
 	case F_GET_RW_HINT:
 		h = inode->i_write_hint;
 		if (copy_to_user(argp, &h, sizeof(*argp)))
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 23e7f93d3956..02400fd00501 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3790,7 +3790,6 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
 		return -EBADF;
-	req->rw.kiocb.ki_hint = ki_hint_validate(file_write_hint(req->file));
 	return io_prep_rw(req, sqe);
 }
 
diff --git a/fs/open.c b/fs/open.c
index 9ff2f621b760..1315253e0247 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -835,7 +835,6 @@ static int do_dentry_open(struct file *f,
 	     likely(f->f_op->write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
-	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..a1fc3b41cd82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -327,7 +327,6 @@ struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret);
 	void			*private;
 	int			ki_flags;
-	u16			ki_hint;
 	u16			ki_ioprio; /* See linux/ioprio.h */
 	struct wait_page_queue	*ki_waitq; /* for async buffered IO */
 	randomized_struct_fields_end
@@ -967,7 +966,6 @@ struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
-	enum rw_hint		f_write_hint;
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -2215,31 +2213,13 @@ static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
 	       !gid_valid(i_gid_into_mnt(mnt_userns, inode));
 }
 
-static inline enum rw_hint file_write_hint(struct file *file)
-{
-	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
-		return file->f_write_hint;
-
-	return file_inode(file)->i_write_hint;
-}
-
 static inline int iocb_flags(struct file *file);
 
-static inline u16 ki_hint_validate(enum rw_hint hint)
-{
-	typeof(((struct kiocb *)0)->ki_hint) max_hint = -1;
-
-	if (hint <= max_hint)
-		return hint;
-	return 0;
-}
-
 static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 {
 	*kiocb = (struct kiocb) {
 		.ki_filp = filp,
 		.ki_flags = iocb_flags(filp),
-		.ki_hint = ki_hint_validate(file_write_hint(filp)),
 		.ki_ioprio = get_current_ioprio(),
 	};
 }
@@ -2250,7 +2230,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 	*kiocb = (struct kiocb) {
 		.ki_filp = filp,
 		.ki_flags = kiocb_src->ki_flags,
-		.ki_hint = kiocb_src->ki_hint,
 		.ki_ioprio = kiocb_src->ki_ioprio,
 		.ki_pos = kiocb_src->ki_pos,
 	};
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index f701bb23f83c..1779e133cea0 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -956,12 +956,11 @@ TRACE_EVENT(f2fs_direct_IO_enter,
 		__entry->rw	= rw;
 	),
 
-	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d",
+	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_ioprio = %x rw = %d",
 		show_dev_ino(__entry),
 		__entry->iocb->ki_pos,
 		__entry->len,
 		__entry->iocb->ki_flags,
-		__entry->iocb->ki_hint,
 		__entry->iocb->ki_ioprio,
 		__entry->rw)
 );

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-06 17:11         ` Jens Axboe
@ 2022-03-06 18:01           ` Christoph Hellwig
  2022-03-06 18:06             ` Jens Axboe
  0 siblings, 1 reply; 43+ messages in thread
From: Christoph Hellwig @ 2022-03-06 18:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Dave Chinner, Christoph Hellwig, sagi, kbusch, song, linux-block,
	linux-raid, linux-nvme, linux-fsdevel

On Sun, Mar 06, 2022 at 10:11:46AM -0700, Jens Axboe wrote:
> Yes, I think we should kill it. If we retain the inode hint, the f2fs
> doesn't need a any changes. And it should be safe to make the per-file
> fcntl hints return EINVAL, which they would on older kernels anyway.
> Untested, but something like the below.

I've sent this off to the testing farm this morning, but EINVAL might
be even better:

http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/more-hint-removal

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-06 18:01           ` Christoph Hellwig
@ 2022-03-06 18:06             ` Jens Axboe
  2022-03-06 23:17               ` Dave Chinner
  0 siblings, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-06 18:06 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dave Chinner, sagi, kbusch, song, linux-block, linux-raid,
	linux-nvme, linux-fsdevel

On 3/6/22 11:01 AM, Christoph Hellwig wrote:
> On Sun, Mar 06, 2022 at 10:11:46AM -0700, Jens Axboe wrote:
>> Yes, I think we should kill it. If we retain the inode hint, the f2fs
>> doesn't need a any changes. And it should be safe to make the per-file
>> fcntl hints return EINVAL, which they would on older kernels anyway.
>> Untested, but something like the below.
> 
> I've sent this off to the testing farm this morning, but EINVAL might
> be even better:
> 
> http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/more-hint-removal

I do think EINVAL is better, as it just tells the app it's not available
like we would've done before. With just doing zeroes, that might break
applications that set-and-verify. Of course there's also the risk of
that since we retain inode hints (so they work), but fail file hints.
That's a lesser risk though, and we only know of the inode hints being
used.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint
  2022-03-06 18:06             ` Jens Axboe
@ 2022-03-06 23:17               ` Dave Chinner
       [not found]                 ` <CGME20220309042324epcas1p111312e20f4429dc3a17172458284a923@epcas1p1.samsung.com>
  0 siblings, 1 reply; 43+ messages in thread
From: Dave Chinner @ 2022-03-06 23:17 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, sagi, kbusch, song, linux-block, linux-raid,
	linux-nvme, linux-fsdevel

On Sun, Mar 06, 2022 at 11:06:12AM -0700, Jens Axboe wrote:
> On 3/6/22 11:01 AM, Christoph Hellwig wrote:
> > On Sun, Mar 06, 2022 at 10:11:46AM -0700, Jens Axboe wrote:
> >> Yes, I think we should kill it. If we retain the inode hint, the f2fs
> >> doesn't need a any changes. And it should be safe to make the per-file
> >> fcntl hints return EINVAL, which they would on older kernels anyway.
> >> Untested, but something like the below.
> > 
> > I've sent this off to the testing farm this morning, but EINVAL might
> > be even better:
> > 
> > http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/more-hint-removal

Yup, I like that.

> I do think EINVAL is better, as it just tells the app it's not available
> like we would've done before. With just doing zeroes, that might break
> applications that set-and-verify. Of course there's also the risk of
> that since we retain inode hints (so they work), but fail file hints.
> That's a lesser risk though, and we only know of the inode hints being
> used.

Agreed, I think EINVAL would be better here - jsut make it behave
like it would on a kernel that never supported this functionality in
the first place. Seems simpler to me for user applications if we do
that.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-09 13:31                   ` Manjong Lee
@ 2022-03-09  8:24                     ` Paul Menzel
  2022-03-10 11:34                     ` [EXT] " Luca Porzio (lporzio)
  1 sibling, 0 replies; 43+ messages in thread
From: Paul Menzel @ 2022-03-09  8:24 UTC (permalink / raw)
  To: Manjong Lee
  Cc: david, axboe, hch, kbusch, linux-block, linux-fsdevel,
	linux-nvme, linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim,
	nanich.lee, woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

Dear Manjong,


Am 09.03.22 um 14:31 schrieb Manjong Lee:

Just a small note, that your message date is from the future. Please 
check your system clock.


Kind regards,

Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint.
       [not found]                 ` <CGME20220309042324epcas1p111312e20f4429dc3a17172458284a923@epcas1p1.samsung.com>
@ 2022-03-09 13:31                   ` Manjong Lee
  2022-03-09  8:24                     ` Paul Menzel
  2022-03-10 11:34                     ` [EXT] " Luca Porzio (lporzio)
  0 siblings, 2 replies; 43+ messages in thread
From: Manjong Lee @ 2022-03-09 13:31 UTC (permalink / raw)
  To: david
  Cc: axboe, hch, kbusch, linux-block, linux-fsdevel, linux-nvme,
	linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

>On Sun, ddMar 06, 2022 at 11:06:12AM -0700, Jens Axboe wrote:
>> On 3/6/22 11:01 AM, Christoph Hellwig wrote:
>> > On Sun, Mar 06, 2022 at 10:11:46AM -0700, Jens Axboe wrote:
>> >> Yes, I think we should kill it. If we retain the inode hint, the f2fs
>> >> doesn't need a any changes. And it should be safe to make the per-file
>> >> fcntl hints return EINVAL, which they would on older kernels anyway.
>> >> Untested, but something like the below.
>> > 
>> > I've sent this off to the testing farm this morning, but EINVAL might
>> > be even better:
>> > 
>> > http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/more-hint-removal
>
>Yup, I like that.
>
>> I do think EINVAL is better, as it just tells the app it's not available
>> like we would've done before. With just doing zeroes, that might break
>> applications that set-and-verify. Of course there's also the risk of
>> that since we retain inode hints (so they work), but fail file hints.
>> That's a lesser risk though, and we only know of the inode hints being
>> used.
>
>Agreed, I think EINVAL would be better here - jsut make it behave
>like it would on a kernel that never supported this functionality in
>the first place. Seems simpler to me for user applications if we do
>that.
>
>Cheers,
>
>Dave.
>-- 
>Dave Chinner
>david@fromorbit.com
>

Currently, UFS device also supports hot/cold data separation 
and uses existing write_hint code.

In other words, the function is also being used in storage other than NVMe,
and if it is removed, it is thought that there will be an operation problem.

If the code is removed, I am worried about how other devices
that use the function.

Is there a good alternative?

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-09 13:31                   ` Manjong Lee
  2022-03-09  8:24                     ` Paul Menzel
@ 2022-03-10 11:34                     ` Luca Porzio (lporzio)
  2022-03-10 12:15                       ` Jens Axboe
  2022-03-10 14:21                       ` hch
  1 sibling, 2 replies; 43+ messages in thread
From: Luca Porzio (lporzio) @ 2022-03-10 11:34 UTC (permalink / raw)
  To: Manjong Lee, david
  Cc: axboe, hch, kbusch, linux-block, linux-fsdevel, linux-nvme,
	linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

> -----Original Message-----
> From: Manjong Lee <mj0123.lee@samsung.com>
> Sent: Wednesday, March 9, 2022 2:31 PM
> To: david@fromorbit.com
> Cc: axboe@kernel.dk; hch@lst.de; kbusch@kernel.org; linux-
> block@vger.kernel.org; linux-fsdevel@vger.kernel.org; linux-
> nvme@lists.infradead.org; linux-raid@vger.kernel.org; sagi@grimberg.me;
> song@kernel.org; seunghwan.hyun@samsung.com;
> sookwan7.kim@samsung.com; nanich.lee@samsung.com;
> woosung2.lee@samsung.com; yt0928.kim@samsung.com;
> junho89.kim@samsung.com; jisoo2146.oh@samsung.com
> Subject: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
> 
> CAUTION: EXTERNAL EMAIL. Do not click links or open attachments unless
> you recognize the sender and were expecting this message.
> 
> 
> >On Sun, ddMar 06, 2022 at 11:06:12AM -0700, Jens Axboe wrote:
> >> On 3/6/22 11:01 AM, Christoph Hellwig wrote:
> >> > On Sun, Mar 06, 2022 at 10:11:46AM -0700, Jens Axboe wrote:
> >> >> Yes, I think we should kill it. If we retain the inode hint, the
> >> >> f2fs doesn't need a any changes. And it should be safe to make the
> >> >> per-file fcntl hints return EINVAL, which they would on older kernels
> anyway.
> >> >> Untested, but something like the below.
> >> >
> >> > I've sent this off to the testing farm this morning, but EINVAL
> >> > might be even better:
> >> >
> >> > https://urldefense.com/v3/__http://git.infradead.org/users/hch/bloc
> >> > k.git/shortlog/refs/heads/more-hint-
> removal__;!!KZTdOCjhgt4hgw!qsgy
> >> >
> oejchUYPeorpCL0Ov3jPGvXpXgxa7hpSCViD7XQy7uJDMDLo3U8v_bmoUtg$
> >
> >Yup, I like that.
> >
> >> I do think EINVAL is better, as it just tells the app it's not
> >> available like we would've done before. With just doing zeroes, that
> >> might break applications that set-and-verify. Of course there's also
> >> the risk of that since we retain inode hints (so they work), but fail file
> hints.
> >> That's a lesser risk though, and we only know of the inode hints
> >> being used.
> >
> >Agreed, I think EINVAL would be better here - jsut make it behave like
> >it would on a kernel that never supported this functionality in the
> >first place. Seems simpler to me for user applications if we do that.
> >
> >Cheers,
> >
> >Dave.
> >--
> >Dave Chinner
> >david@fromorbit.com
> >
> 
> Currently, UFS device also supports hot/cold data separation and uses
> existing write_hint code.
> 
> In other words, the function is also being used in storage other than NVMe,
> and if it is removed, it is thought that there will be an operation problem.
> 
> If the code is removed, I am worried about how other devices that use the
> function.
> 
> Is there a good alternative?

Hi all,

I work for Micron UFS team. I confirm and support Manjong message above.
There are UFS customers using custom write_hint in Android and due to the 
"upstream first" policy from Google, if you remove write_hints in block device,
The Android ecosystem will suffer this lack.

Can we revert back this decision? Or think of an alternative solution which 
may work?

Cheers,
   Luca

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 11:34                     ` [EXT] " Luca Porzio (lporzio)
@ 2022-03-10 12:15                       ` Jens Axboe
  2022-03-10 18:50                         ` Luca Porzio (lporzio)
  2022-03-10 14:21                       ` hch
  1 sibling, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-10 12:15 UTC (permalink / raw)
  To: Luca Porzio (lporzio), Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

On 3/10/22 4:34 AM, Luca Porzio (lporzio) wrote:
>> -----Original Message-----
>> From: Manjong Lee <mj0123.lee@samsung.com>
>> Sent: Wednesday, March 9, 2022 2:31 PM
>> To: david@fromorbit.com
>> Cc: axboe@kernel.dk; hch@lst.de; kbusch@kernel.org; linux-
>> block@vger.kernel.org; linux-fsdevel@vger.kernel.org; linux-
>> nvme@lists.infradead.org; linux-raid@vger.kernel.org; sagi@grimberg.me;
>> song@kernel.org; seunghwan.hyun@samsung.com;
>> sookwan7.kim@samsung.com; nanich.lee@samsung.com;
>> woosung2.lee@samsung.com; yt0928.kim@samsung.com;
>> junho89.kim@samsung.com; jisoo2146.oh@samsung.com
>> Subject: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
>>
>> CAUTION: EXTERNAL EMAIL. Do not click links or open attachments unless
>> you recognize the sender and were expecting this message.
>>
>>
>>> On Sun, ddMar 06, 2022 at 11:06:12AM -0700, Jens Axboe wrote:
>>>> On 3/6/22 11:01 AM, Christoph Hellwig wrote:
>>>>> On Sun, Mar 06, 2022 at 10:11:46AM -0700, Jens Axboe wrote:
>>>>>> Yes, I think we should kill it. If we retain the inode hint, the
>>>>>> f2fs doesn't need a any changes. And it should be safe to make the
>>>>>> per-file fcntl hints return EINVAL, which they would on older kernels
>> anyway.
>>>>>> Untested, but something like the below.
>>>>>
>>>>> I've sent this off to the testing farm this morning, but EINVAL
>>>>> might be even better:
>>>>>
>>>>> https://urldefense.com/v3/__http://git.infradead.org/users/hch/bloc
>>>>> k.git/shortlog/refs/heads/more-hint-
>> removal__;!!KZTdOCjhgt4hgw!qsgy
>>>>>
>> oejchUYPeorpCL0Ov3jPGvXpXgxa7hpSCViD7XQy7uJDMDLo3U8v_bmoUtg$
>>>
>>> Yup, I like that.
>>>
>>>> I do think EINVAL is better, as it just tells the app it's not
>>>> available like we would've done before. With just doing zeroes, that
>>>> might break applications that set-and-verify. Of course there's also
>>>> the risk of that since we retain inode hints (so they work), but fail file
>> hints.
>>>> That's a lesser risk though, and we only know of the inode hints
>>>> being used.
>>>
>>> Agreed, I think EINVAL would be better here - jsut make it behave like
>>> it would on a kernel that never supported this functionality in the
>>> first place. Seems simpler to me for user applications if we do that.
>>>
>>> Cheers,
>>>
>>> Dave.
>>> --
>>> Dave Chinner
>>> david@fromorbit.com
>>>
>>
>> Currently, UFS device also supports hot/cold data separation and uses
>> existing write_hint code.
>>
>> In other words, the function is also being used in storage other than NVMe,
>> and if it is removed, it is thought that there will be an operation problem.
>>
>> If the code is removed, I am worried about how other devices that use the
>> function.
>>
>> Is there a good alternative?
> 
> Hi all,
> 
> I work for Micron UFS team. I confirm and support Manjong message above.
> There are UFS customers using custom write_hint in Android and due to the 
> "upstream first" policy from Google, if you remove write_hints in block device,
> The Android ecosystem will suffer this lack.
> 
> Can we revert back this decision? Or think of an alternative solution which 
> may work?

You do both realize that this is just the file specific hint? Inode
based hints will still work fine for UFS.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 11:34                     ` [EXT] " Luca Porzio (lporzio)
  2022-03-10 12:15                       ` Jens Axboe
@ 2022-03-10 14:21                       ` hch
  2022-03-10 18:51                         ` Luca Porzio (lporzio)
  1 sibling, 1 reply; 43+ messages in thread
From: hch @ 2022-03-10 14:21 UTC (permalink / raw)
  To: Luca Porzio (lporzio)
  Cc: Manjong Lee, david, axboe, hch, kbusch, linux-block,
	linux-fsdevel, linux-nvme, linux-raid, sagi, song,
	seunghwan.hyun, sookwan7.kim, nanich.lee, woosung2.lee,
	yt0928.kim, junho89.kim, jisoo2146.oh

On Thu, Mar 10, 2022 at 11:34:40AM +0000, Luca Porzio (lporzio) wrote:
> I work for Micron UFS team. I confirm and support Manjong message above.
> There are UFS customers using custom write_hint in Android and due to the 
> "upstream first" policy from Google, if you remove write_hints in block device,
> The Android ecosystem will suffer this lack.
> 
> Can we revert back this decision? Or think of an alternative solution which 
> may work?

Hell no.  Given that these custmomers never gave a singe fuck to actually
support whatever crap they came up with upstream we're not going to leave
dead code to encurage them to keep doing this.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 12:15                       ` Jens Axboe
@ 2022-03-10 18:50                         ` Luca Porzio (lporzio)
  2022-03-10 19:10                           ` Jens Axboe
  0 siblings, 1 reply; 43+ messages in thread
From: Luca Porzio (lporzio) @ 2022-03-10 18:50 UTC (permalink / raw)
  To: Jens Axboe, Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

Micron Confidential

> 
> You do both realize that this is just the file specific hint? Inode based hints
> will still work fine for UFS.
> 
> --
> Jens Axboe

Jens,

Thanks for this reply.

This whole patch series removes support for per-bio write_hint. 
Without bio write_hint, F2FS won't be able to cascade Hot/Warm/Cold 
information to SCSI / UFS driver.

This is my current understanding. I might be wrong but I don't think we 
Are concerned with inode hint (as well as file hints).

Thanks,
  Luca


Micron Confidential

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 14:21                       ` hch
@ 2022-03-10 18:51                         ` Luca Porzio (lporzio)
  2022-03-10 19:14                           ` hch
  2022-03-11  5:06                           ` Eric Biggers
  0 siblings, 2 replies; 43+ messages in thread
From: Luca Porzio (lporzio) @ 2022-03-10 18:51 UTC (permalink / raw)
  To: hch
  Cc: Manjong Lee, david, axboe, kbusch, linux-block, linux-fsdevel,
	linux-nvme, linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim,
	nanich.lee, woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

Micron Confidential


> > Can we revert back this decision? Or think of an alternative solution
> > which may work?
> 
> Hell no.  Given that these custmomers never gave a singe fuck to actually
> support whatever crap they came up with upstream we're not going to leave
> dead code to encurage them to keep doing this.

You are starting from the wrong assumption: I'm just saying this is not dead code
but it is used across the (Android) ecosystem.

Micron Confidential

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 18:50                         ` Luca Porzio (lporzio)
@ 2022-03-10 19:10                           ` Jens Axboe
  2022-03-10 19:34                             ` Bart Van Assche
  0 siblings, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-10 19:10 UTC (permalink / raw)
  To: Luca Porzio (lporzio), Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh

On 3/10/22 11:50 AM, Luca Porzio (lporzio) wrote:
> Micron Confidential
> 
>>
>> You do both realize that this is just the file specific hint? Inode based hints
>> will still work fine for UFS.
>>
>> --
>> Jens Axboe
> 
> Jens,
> 
> Thanks for this reply.
> 
> This whole patch series removes support for per-bio write_hint. 
> Without bio write_hint, F2FS won't be able to cascade Hot/Warm/Cold 
> information to SCSI / UFS driver.
> 
> This is my current understanding. I might be wrong but I don't think we 
> Are concerned with inode hint (as well as file hints).

But ufs/scsi doesn't use it in mainline, as far as I can tell. So how
does that work?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 18:51                         ` Luca Porzio (lporzio)
@ 2022-03-10 19:14                           ` hch
  2022-03-11  5:06                           ` Eric Biggers
  1 sibling, 0 replies; 43+ messages in thread
From: hch @ 2022-03-10 19:14 UTC (permalink / raw)
  To: Luca Porzio (lporzio)
  Cc: hch, Manjong Lee, david, axboe, kbusch, linux-block,
	linux-fsdevel, linux-nvme, linux-raid, sagi, song,
	seunghwan.hyun, sookwan7.kim, nanich.lee, woosung2.lee,
	yt0928.kim, junho89.kim, jisoo2146.oh

On Thu, Mar 10, 2022 at 06:51:39PM +0000, Luca Porzio (lporzio) wrote:
> You are starting from the wrong assumption: I'm just saying this is not dead code
> but it is used across the (Android) ecosystem.

No one gives cares about random forks, so no I'm not starting from the
wrong assumptіon.  You on the other hand seem to have a lot of learning
to do if you thing some random kernel fork matters the slightest about
what is considered dead code.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 19:10                           ` Jens Axboe
@ 2022-03-10 19:34                             ` Bart Van Assche
  2022-03-10 21:52                               ` Bean Huo (beanhuo)
  0 siblings, 1 reply; 43+ messages in thread
From: Bart Van Assche @ 2022-03-10 19:34 UTC (permalink / raw)
  To: Jens Axboe, Luca Porzio (lporzio), Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/10/22 11:10, Jens Axboe wrote:
> On 3/10/22 11:50 AM, Luca Porzio (lporzio) wrote:
>> Micron Confidential
>>
>>>
>>> You do both realize that this is just the file specific hint? Inode based hints
>>> will still work fine for UFS.
>>>
>>> --
>>> Jens Axboe
>>
>> Jens,
>>
>> Thanks for this reply.
>>
>> This whole patch series removes support for per-bio write_hint.
>> Without bio write_hint, F2FS won't be able to cascade Hot/Warm/Cold
>> information to SCSI / UFS driver.
>>
>> This is my current understanding. I might be wrong but I don't think we
>> Are concerned with inode hint (as well as file hints).
> 
> But ufs/scsi doesn't use it in mainline, as far as I can tell. So how
> does that work?

Hi Luca,

I'm not aware of any Android branch on which the UFS driver or the SCSI 
core uses bi_write_hint or the struct request write_hint member. Did I 
perhaps overlook something?

Thanks,

Bart.



^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 19:34                             ` Bart Van Assche
@ 2022-03-10 21:52                               ` Bean Huo (beanhuo)
  2022-03-10 22:10                                 ` Jens Axboe
  2022-03-10 22:18                                 ` Bart Van Assche
  0 siblings, 2 replies; 43+ messages in thread
From: Bean Huo (beanhuo) @ 2022-03-10 21:52 UTC (permalink / raw)
  To: Bart Van Assche, Jens Axboe, Luca Porzio (lporzio), Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

Micron Confidential

> >>
> >>>
> >>> You do both realize that this is just the file specific hint? Inode
> >>> based hints will still work fine for UFS.
> >>>
> >>> --
> >>> Jens Axboe
> >>
> >> Jens,
> >>
> >> Thanks for this reply.
> >>
> >> This whole patch series removes support for per-bio write_hint.
> >> Without bio write_hint, F2FS won't be able to cascade Hot/Warm/Cold
> >> information to SCSI / UFS driver.
> >>
> >> This is my current understanding. I might be wrong but I don't think
> >> we Are concerned with inode hint (as well as file hints).
> >
> > But ufs/scsi doesn't use it in mainline, as far as I can tell. So how
> > does that work?
> 
> Hi Luca,
> 
> I'm not aware of any Android branch on which the UFS driver or the SCSI core
> uses bi_write_hint or the struct request write_hint member. Did I perhaps
> overlook something?
> 
> Thanks,
> 


Bart,

Yes, in upstream linux and upstream android, there is no such code. But as we know,
mobile customers have used bio->bi_write_hint in their products for years. And the
group ID is set according to bio->bi_write_hint before passing the CDB to UFS.


	lrbp = &hba->lrb[tag];
 
              WARN_ON(lrbp->cmd);
             + if(cmd->cmnd[0] == WRITE_10)
              +{
                +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
              +}             
              lrbp->cmd = cmd;
              lrbp->sense_bufflen = UFS_SENSE_SIZE;
              lrbp->sense_buffer = cmd->sense_buffer;

I don't know why they don't push these changes to the community, maybe
it's because changes across the file system and block layers are unacceptable to the
block layer and FS. but for sure we should now warn them to push to the
community as soon as possible. 

Bean



Micron Confidential

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 21:52                               ` Bean Huo (beanhuo)
@ 2022-03-10 22:10                                 ` Jens Axboe
  2022-03-11 16:45                                   ` Bart Van Assche
  2022-03-10 22:18                                 ` Bart Van Assche
  1 sibling, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-10 22:10 UTC (permalink / raw)
  To: Bean Huo (beanhuo), Bart Van Assche, Luca Porzio (lporzio),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/10/22 2:52 PM, Bean Huo (beanhuo) wrote:
> Micron Confidential
> 
>>>>
>>>>>
>>>>> You do both realize that this is just the file specific hint? Inode
>>>>> based hints will still work fine for UFS.
>>>>>
>>>>> --
>>>>> Jens Axboe
>>>>
>>>> Jens,
>>>>
>>>> Thanks for this reply.
>>>>
>>>> This whole patch series removes support for per-bio write_hint.
>>>> Without bio write_hint, F2FS won't be able to cascade Hot/Warm/Cold
>>>> information to SCSI / UFS driver.
>>>>
>>>> This is my current understanding. I might be wrong but I don't think
>>>> we Are concerned with inode hint (as well as file hints).
>>>
>>> But ufs/scsi doesn't use it in mainline, as far as I can tell. So how
>>> does that work?
>>
>> Hi Luca,
>>
>> I'm not aware of any Android branch on which the UFS driver or the SCSI core
>> uses bi_write_hint or the struct request write_hint member. Did I perhaps
>> overlook something?
>>
>> Thanks,
>>
> 
> 
> Bart,
> 
> Yes, in upstream linux and upstream android, there is no such code.
> But as we know, mobile customers have used bio->bi_write_hint in their
> products for years. And the group ID is set according to
> bio->bi_write_hint before passing the CDB to UFS.
> 
> 
> 	lrbp = &hba->lrb[tag];
>  
>               WARN_ON(lrbp->cmd);
>              + if(cmd->cmnd[0] == WRITE_10)
>               +{
>                 +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
>               +}             
>               lrbp->cmd = cmd;
>               lrbp->sense_bufflen = UFS_SENSE_SIZE;
>               lrbp->sense_buffer = cmd->sense_buffer;
> 
> I don't know why they don't push these changes to the community, maybe
> it's because changes across the file system and block layers are
> unacceptable to the block layer and FS. but for sure we should now
> warn them to push to the community as soon as possible. 

If the code isn't upstream, it's a bit late to start thinking about
that now. This feature has existed for 5 years at this point, and the
only consumer was NVMe. The upstream kernel cares only about what is
in-tree, as that is the only part we can modify and fix. We
change/modify internal kernel APIs all the time, which is how tech debt
is removed and the long term sanity of the project is maintained. This
in turn means that out-of-tree code will break, that's just a natural
side effect and something we can't do anything about.

If at some point there's a desire to actually try and upstream this
support, then we'll be happy to review that patchset. Or you can
continue to stay out-of-tree and just patch in what you need. If you're
already modifying core code, then that shouldn't be a problem.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 21:52                               ` Bean Huo (beanhuo)
  2022-03-10 22:10                                 ` Jens Axboe
@ 2022-03-10 22:18                                 ` Bart Van Assche
  2022-03-11  5:31                                     ` [f2fs-dev] " Eric Biggers
  2022-03-11  9:21                                   ` Luca Porzio (lporzio)
  1 sibling, 2 replies; 43+ messages in thread
From: Bart Van Assche @ 2022-03-10 22:18 UTC (permalink / raw)
  To: Bean Huo (beanhuo), Jens Axboe, Luca Porzio (lporzio),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/10/22 13:52, Bean Huo (beanhuo) wrote:
> Yes, in upstream linux and upstream android, there is no such code. But as we know,
> mobile customers have used bio->bi_write_hint in their products for years. And the
> group ID is set according to bio->bi_write_hint before passing the CDB to UFS.
> 
> 
> 	lrbp = &hba->lrb[tag];
>   
>                WARN_ON(lrbp->cmd);
>               + if(cmd->cmnd[0] == WRITE_10)
>                +{
>                  +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
>                +}
>                lrbp->cmd = cmd;
>                lrbp->sense_bufflen = UFS_SENSE_SIZE;
>                lrbp->sense_buffer = cmd->sense_buffer;
> 
> I don't know why they don't push these changes to the community, maybe
> it's because changes across the file system and block layers are unacceptable to the
> block layer and FS. but for sure we should now warn them to push to the
> community as soon as possible.

Thanks Bean for having shared this information. I think the above code sets the GROUP
NUMBER information in the WRITE(10) command and also that the following text from the
UFS specification applies to that information:
<quote>
GROUP NUMBER: Notifies the Target device that the data linked to a ContextID:
  -----------------------------------------------------------------------------------------
     GROUP NUMBER Value     |  Function
  -----------------------------------------------------------------------------------------
  00000b                    | Default, no Context ID is associated with the read operation.
  00001b to 01111b (0XXXXb) | Context ID. (XXXX I from 0001b to 1111b ‐ Context ID value)
  10000b                    | Data has System Data characteristics
  10001b to 11111b          | Reserved
  -----------------------------------------------------------------------------------------

In case the GROUP NUMBER is set to a reserved value, the operation shall fail and a status
response of CHECK CONDITION will be returned along with the sense key set to ILLEGAL REQUEST.
</quote>

Since there is a desire to remove the write hint information from struct bio, is there
any other information the "system data characteristics" information can be derived from?
How about e.g. deriving that information from request flags like REQ_SYNC, REQ_META and/or
REQ_IDLE?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 18:51                         ` Luca Porzio (lporzio)
  2022-03-10 19:14                           ` hch
@ 2022-03-11  5:06                           ` Eric Biggers
  2022-03-11  9:23                             ` Luca Porzio (lporzio)
  1 sibling, 1 reply; 43+ messages in thread
From: Eric Biggers @ 2022-03-11  5:06 UTC (permalink / raw)
  To: Luca Porzio (lporzio)
  Cc: hch, Manjong Lee, david, axboe, kbusch, linux-block,
	linux-fsdevel, linux-nvme, linux-raid, sagi, song,
	seunghwan.hyun, sookwan7.kim, nanich.lee, woosung2.lee,
	yt0928.kim, junho89.kim, jisoo2146.oh

On Thu, Mar 10, 2022 at 06:51:39PM +0000, Luca Porzio (lporzio) wrote:
>
> Micron Confidential

This is a public mailing list, so please do not use this header/footer.

> it is used across the (Android) ecosystem.

So why hasn't it been submitted upstream?

- Eric

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 22:18                                 ` Bart Van Assche
@ 2022-03-11  5:31                                     ` Eric Biggers
  2022-03-11  9:21                                   ` Luca Porzio (lporzio)
  1 sibling, 0 replies; 43+ messages in thread
From: Eric Biggers @ 2022-03-11  5:31 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Bean Huo (beanhuo), Jens Axboe, Luca Porzio (lporzio),
	Manjong Lee, david, hch, kbusch, linux-block, linux-fsdevel,
	linux-nvme, linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim,
	nanich.lee, woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh,
	Jaegeuk Kim, linux-f2fs-devel

On Thu, Mar 10, 2022 at 02:18:19PM -0800, Bart Van Assche wrote:
> On 3/10/22 13:52, Bean Huo (beanhuo) wrote:
> > Yes, in upstream linux and upstream android, there is no such code. But as we know,
> > mobile customers have used bio->bi_write_hint in their products for years. And the
> > group ID is set according to bio->bi_write_hint before passing the CDB to UFS.
> > 
> > 
> > 	lrbp = &hba->lrb[tag];
> >                WARN_ON(lrbp->cmd);
> >               + if(cmd->cmnd[0] == WRITE_10)
> >                +{
> >                  +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
> >                +}
> >                lrbp->cmd = cmd;
> >                lrbp->sense_bufflen = UFS_SENSE_SIZE;
> >                lrbp->sense_buffer = cmd->sense_buffer;
> > 
> > I don't know why they don't push these changes to the community, maybe
> > it's because changes across the file system and block layers are unacceptable to the
> > block layer and FS. but for sure we should now warn them to push to the
> > community as soon as possible.
> 
> Thanks Bean for having shared this information. I think the above code sets the GROUP
> NUMBER information in the WRITE(10) command and also that the following text from the
> UFS specification applies to that information:
> <quote>
> GROUP NUMBER: Notifies the Target device that the data linked to a ContextID:
>  -----------------------------------------------------------------------------------------
>     GROUP NUMBER Value     |  Function
>  -----------------------------------------------------------------------------------------
>  00000b                    | Default, no Context ID is associated with the read operation.
>  00001b to 01111b (0XXXXb) | Context ID. (XXXX I from 0001b to 1111b ‐ Context ID value)
>  10000b                    | Data has System Data characteristics
>  10001b to 11111b          | Reserved
>  -----------------------------------------------------------------------------------------
> 
> In case the GROUP NUMBER is set to a reserved value, the operation shall fail and a status
> response of CHECK CONDITION will be returned along with the sense key set to ILLEGAL REQUEST.
> </quote>
> 
> Since there is a desire to remove the write hint information from struct bio, is there
> any other information the "system data characteristics" information can be derived from?
> How about e.g. deriving that information from request flags like REQ_SYNC, REQ_META and/or
> REQ_IDLE?
> 

[+Cc linux-f2fs-devel]

I think the f2fs developers will need to chime in here, as it looks like f2fs
uses the write hints for different data categories like hot/cold/warm.  I'm not
sure those can be fully represented by other bio flags.

Either way, the good news is that it sounds like this "GROUP NUMBER" thing is
part of the UFS standard.  So whatever the best way to support it is, it can
just be submitted upstream like any other standard UFS feature.  Why hasn't that
been done?

- Eric

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [f2fs-dev] [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
@ 2022-03-11  5:31                                     ` Eric Biggers
  0 siblings, 0 replies; 43+ messages in thread
From: Eric Biggers @ 2022-03-11  5:31 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: woosung2.lee, david, linux-nvme, seunghwan.hyun, song, hch,
	Bean Huo (beanhuo),
	sagi, jisoo2146.oh, nanich.lee, yt0928.kim, linux-block,
	linux-fsdevel, sookwan7.kim, kbusch, Jaegeuk Kim, Jens Axboe,
	linux-raid, linux-f2fs-devel, junho89.kim, Manjong Lee

On Thu, Mar 10, 2022 at 02:18:19PM -0800, Bart Van Assche wrote:
> On 3/10/22 13:52, Bean Huo (beanhuo) wrote:
> > Yes, in upstream linux and upstream android, there is no such code. But as we know,
> > mobile customers have used bio->bi_write_hint in their products for years. And the
> > group ID is set according to bio->bi_write_hint before passing the CDB to UFS.
> > 
> > 
> > 	lrbp = &hba->lrb[tag];
> >                WARN_ON(lrbp->cmd);
> >               + if(cmd->cmnd[0] == WRITE_10)
> >                +{
> >                  +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
> >                +}
> >                lrbp->cmd = cmd;
> >                lrbp->sense_bufflen = UFS_SENSE_SIZE;
> >                lrbp->sense_buffer = cmd->sense_buffer;
> > 
> > I don't know why they don't push these changes to the community, maybe
> > it's because changes across the file system and block layers are unacceptable to the
> > block layer and FS. but for sure we should now warn them to push to the
> > community as soon as possible.
> 
> Thanks Bean for having shared this information. I think the above code sets the GROUP
> NUMBER information in the WRITE(10) command and also that the following text from the
> UFS specification applies to that information:
> <quote>
> GROUP NUMBER: Notifies the Target device that the data linked to a ContextID:
>  -----------------------------------------------------------------------------------------
>     GROUP NUMBER Value     |  Function
>  -----------------------------------------------------------------------------------------
>  00000b                    | Default, no Context ID is associated with the read operation.
>  00001b to 01111b (0XXXXb) | Context ID. (XXXX I from 0001b to 1111b ‐ Context ID value)
>  10000b                    | Data has System Data characteristics
>  10001b to 11111b          | Reserved
>  -----------------------------------------------------------------------------------------
> 
> In case the GROUP NUMBER is set to a reserved value, the operation shall fail and a status
> response of CHECK CONDITION will be returned along with the sense key set to ILLEGAL REQUEST.
> </quote>
> 
> Since there is a desire to remove the write hint information from struct bio, is there
> any other information the "system data characteristics" information can be derived from?
> How about e.g. deriving that information from request flags like REQ_SYNC, REQ_META and/or
> REQ_IDLE?
> 

[+Cc linux-f2fs-devel]

I think the f2fs developers will need to chime in here, as it looks like f2fs
uses the write hints for different data categories like hot/cold/warm.  I'm not
sure those can be fully represented by other bio flags.

Either way, the good news is that it sounds like this "GROUP NUMBER" thing is
part of the UFS standard.  So whatever the best way to support it is, it can
just be submitted upstream like any other standard UFS feature.  Why hasn't that
been done?

- Eric


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 22:18                                 ` Bart Van Assche
  2022-03-11  5:31                                     ` [f2fs-dev] " Eric Biggers
@ 2022-03-11  9:21                                   ` Luca Porzio (lporzio)
  1 sibling, 0 replies; 43+ messages in thread
From: Luca Porzio (lporzio) @ 2022-03-11  9:21 UTC (permalink / raw)
  To: Bart Van Assche, Bean Huo (beanhuo), Jens Axboe, Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim



> Since there is a desire to remove the write hint information from struct bio, is
> there any other information the "system data characteristics" information
> can be derived from?
> How about e.g. deriving that information from request flags like REQ_SYNC,
> REQ_META and/or REQ_IDLE?

Bart,

I agree with all your analysis. The point is these flags (Sync, Meta and Idle) are 
really meant for latency management whereas here the problem is hot/cold separation.

At this moment I am not aware of any methodology we can use to map S/M/Idle flags
Into hot/cold meaning but I am open to talk and discuss :-)

> 
> Thanks,
> 
> Bart.

Cheers,
  Luca

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-11  5:06                           ` Eric Biggers
@ 2022-03-11  9:23                             ` Luca Porzio (lporzio)
  0 siblings, 0 replies; 43+ messages in thread
From: Luca Porzio (lporzio) @ 2022-03-11  9:23 UTC (permalink / raw)
  To: Eric Biggers
  Cc: hch, Manjong Lee, david, axboe, kbusch, linux-block,
	linux-fsdevel, linux-nvme, linux-raid, sagi, song,
	seunghwan.hyun, sookwan7.kim, nanich.lee, woosung2.lee,
	yt0928.kim, junho89.kim, jisoo2146.oh

> 
> On Thu, Mar 10, 2022 at 06:51:39PM +0000, Luca Porzio (lporzio) wrote:
> >
> > Micron Confidential
> 
> This is a public mailing list, so please do not use this header/footer.

Eric,

I am sorry, sometimes it is hard for me to cope with all the company proxies/firewall.
I'll try to make sure this won't happen in future.

> 
> > it is used across the (Android) ecosystem.
> 
> So why hasn't it been submitted upstream?

I'm not sure about this but I am open to fix this for future.

> 
> - Eric

Luca

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-10 22:10                                 ` Jens Axboe
@ 2022-03-11 16:45                                   ` Bart Van Assche
  2022-03-11 16:56                                     ` Jens Axboe
  0 siblings, 1 reply; 43+ messages in thread
From: Bart Van Assche @ 2022-03-11 16:45 UTC (permalink / raw)
  To: Jens Axboe, Bean Huo (beanhuo), Luca Porzio (lporzio),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/10/22 14:10, Jens Axboe wrote:
> On 3/10/22 2:52 PM, Bean Huo (beanhuo) wrote:
>> Yes, in upstream linux and upstream android, there is no such code.
>> But as we know, mobile customers have used bio->bi_write_hint in their
>> products for years. And the group ID is set according to
>> bio->bi_write_hint before passing the CDB to UFS.
>>
>>
>> 	lrbp = &hba->lrb[tag];
>>   
>>                WARN_ON(lrbp->cmd);
>>               + if(cmd->cmnd[0] == WRITE_10)
>>                +{
>>                  +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
>>                +}
>>                lrbp->cmd = cmd;
>>                lrbp->sense_bufflen = UFS_SENSE_SIZE;
>>                lrbp->sense_buffer = cmd->sense_buffer;
>>
>> I don't know why they don't push these changes to the community, maybe
>> it's because changes across the file system and block layers are
>> unacceptable to the block layer and FS. but for sure we should now
>> warn them to push to the community as soon as possible.
> 
> If the code isn't upstream, it's a bit late to start thinking about
> that now. This feature has existed for 5 years at this point, and the
> only consumer was NVMe. The upstream kernel cares only about what is
> in-tree, as that is the only part we can modify and fix. We
> change/modify internal kernel APIs all the time, which is how tech debt
> is removed and the long term sanity of the project is maintained. This
> in turn means that out-of-tree code will break, that's just a natural
> side effect and something we can't do anything about.
> 
> If at some point there's a desire to actually try and upstream this
> support, then we'll be happy to review that patchset. Or you can
> continue to stay out-of-tree and just patch in what you need. If you're
> already modifying core code, then that shouldn't be a problem.

Hi Jens,

The "upstream first" policy applies to the Android kernel (see also 
https://arstechnica.com/gadgets/2021/09/android-to-take-an-upstream-first-development-model-for-the-linux-kernel/). 
If anyone requests inclusion in the Android kernel tree of a patch that 
is not upstream, that request is rejected unless a very strong reason 
can be provided why it should be included in the Android kernel only 
instead of being sent upstream. It is not clear to me why the patch Bean 
mentioned is not upstream nor in the upstream Android kernel tree.

 From a UFS vendor I received the feedback that the F2FS write hint 
information helps to reduce write amplification significantly. If the 
write hint information is retained in the upstream kernel I can help 
with making sure that the UFS patch mentioned above is integrated in the 
upstream Linux kernel.

Thanks,

Bart.



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-11 16:45                                   ` Bart Van Assche
@ 2022-03-11 16:56                                     ` Jens Axboe
  2022-03-14  7:40                                       ` Avi Shchislowski
  0 siblings, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-11 16:56 UTC (permalink / raw)
  To: Bart Van Assche, Bean Huo (beanhuo), Luca Porzio (lporzio),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/11/22 9:45 AM, Bart Van Assche wrote:
> On 3/10/22 14:10, Jens Axboe wrote:
>> On 3/10/22 2:52 PM, Bean Huo (beanhuo) wrote:
>>> Yes, in upstream linux and upstream android, there is no such code.
>>> But as we know, mobile customers have used bio->bi_write_hint in their
>>> products for years. And the group ID is set according to
>>> bio->bi_write_hint before passing the CDB to UFS.
>>>
>>>
>>>     lrbp = &hba->lrb[tag];
>>>                  WARN_ON(lrbp->cmd);
>>>               + if(cmd->cmnd[0] == WRITE_10)
>>>                +{
>>>                  +             cmd->cmnd[6] = (0x1f& cmd->request->bio->bi_write_hint);
>>>                +}
>>>                lrbp->cmd = cmd;
>>>                lrbp->sense_bufflen = UFS_SENSE_SIZE;
>>>                lrbp->sense_buffer = cmd->sense_buffer;
>>>
>>> I don't know why they don't push these changes to the community, maybe
>>> it's because changes across the file system and block layers are
>>> unacceptable to the block layer and FS. but for sure we should now
>>> warn them to push to the community as soon as possible.
>>
>> If the code isn't upstream, it's a bit late to start thinking about
>> that now. This feature has existed for 5 years at this point, and the
>> only consumer was NVMe. The upstream kernel cares only about what is
>> in-tree, as that is the only part we can modify and fix. We
>> change/modify internal kernel APIs all the time, which is how tech debt
>> is removed and the long term sanity of the project is maintained. This
>> in turn means that out-of-tree code will break, that's just a natural
>> side effect and something we can't do anything about.
>>
>> If at some point there's a desire to actually try and upstream this
>> support, then we'll be happy to review that patchset. Or you can
>> continue to stay out-of-tree and just patch in what you need. If you're
>> already modifying core code, then that shouldn't be a problem.
> 
> Hi Jens,
> 
> The "upstream first" policy applies to the Android kernel (see also
> https://arstechnica.com/gadgets/2021/09/android-to-take-an-upstream-first-development-model-for-the-linux-kernel/).
> If anyone requests inclusion in the Android kernel tree of a patch
> that is not upstream, that request is rejected unless a very strong
> reason can be provided why it should be included in the Android kernel
> only instead of being sent upstream. It is not clear to me why the
> patch Bean mentioned is not upstream nor in the upstream Android
> kernel tree.
> 
> From a UFS vendor I received the feedback that the F2FS write hint
> information helps to reduce write amplification significantly. If the
> write hint information is retained in the upstream kernel I can help
> with making sure that the UFS patch mentioned above is integrated in
> the upstream Linux kernel.

I'm really not that interested at this point, and I don't want to gate
removal or inclusion of code on some potential future event that may
never happen.

That doesn't mean that the work and process can't continue on the
Android front, the only difference is what the baseline kernel looks
like for the submitted patchset.

Hence I do think we should go ahead as planned, and then we'll just
revisit the topic if/when some actual code comes up.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-11 16:56                                     ` Jens Axboe
@ 2022-03-14  7:40                                       ` Avi Shchislowski
  2022-03-14  8:00                                         ` hch
                                                           ` (2 more replies)
  0 siblings, 3 replies; 43+ messages in thread
From: Avi Shchislowski @ 2022-03-14  7:40 UTC (permalink / raw)
  To: Jens Axboe, Bart Van Assche, Bean Huo (beanhuo),
	Luca Porzio (lporzio),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

> >
> > Hi Jens,
> >
> > The "upstream first" policy applies to the Android kernel (see also
> > https://arstechnica.com/gadgets/2021/09/android-to-take-an-upstream-
> first-development-model-for-the-linux-kernel/).
> > If anyone requests inclusion in the Android kernel tree of a patch
> > that is not upstream, that request is rejected unless a very strong
> > reason can be provided why it should be included in the Android kernel
> > only instead of being sent upstream. It is not clear to me why the
> > patch Bean mentioned is not upstream nor in the upstream Android
> > kernel tree.
> >
> > From a UFS vendor I received the feedback that the F2FS write hint
> > information helps to reduce write amplification significantly. If the
> > write hint information is retained in the upstream kernel I can help
> > with making sure that the UFS patch mentioned above is integrated in
> > the upstream Linux kernel.
> 
> I'm really not that interested at this point, and I don't want to gate removal or
> inclusion of code on some potential future event that may never happen.
> 
> That doesn't mean that the work and process can't continue on the Android
> front, the only difference is what the baseline kernel looks like for the
> submitted patchset.
> 
> Hence I do think we should go ahead as planned, and then we'll just revisit
> the topic if/when some actual code comes up.
> 
We also supports Samsung & Micron approach and sorry to see that this functionality
has been removed.

Cheers,
Avi
> --
> Jens Axboe
> 


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-14  7:40                                       ` Avi Shchislowski
@ 2022-03-14  8:00                                         ` hch
  2022-03-14 19:50                                         ` Eric Biggers
  2022-03-14 19:58                                         ` [EXT] " Jens Axboe
  2 siblings, 0 replies; 43+ messages in thread
From: hch @ 2022-03-14  8:00 UTC (permalink / raw)
  To: Avi Shchislowski
  Cc: Jens Axboe, Bart Van Assche, Bean Huo (beanhuo),
	Luca Porzio (lporzio),
	Manjong Lee, david, hch, kbusch, linux-block, linux-fsdevel,
	linux-nvme, linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim,
	nanich.lee, woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh,
	Jaegeuk Kim

Which part of "the Linux kernel does not keep unused code around" is still
not clear to you after explaining it three times in this thread?

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-14  7:40                                       ` Avi Shchislowski
  2022-03-14  8:00                                         ` hch
@ 2022-03-14 19:50                                         ` Eric Biggers
  2022-03-14 19:58                                         ` [EXT] " Jens Axboe
  2 siblings, 0 replies; 43+ messages in thread
From: Eric Biggers @ 2022-03-14 19:50 UTC (permalink / raw)
  To: Avi Shchislowski
  Cc: Jens Axboe, Bart Van Assche, Bean Huo (beanhuo),
	Luca Porzio (lporzio),
	Manjong Lee, david, hch, kbusch, linux-block, linux-fsdevel,
	linux-nvme, linux-raid, sagi, song, seunghwan.hyun, sookwan7.kim,
	nanich.lee, woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh,
	Jaegeuk Kim

On Mon, Mar 14, 2022 at 07:40:42AM +0000, Avi Shchislowski wrote:
>
> We also supports Samsung & Micron approach

Great, so send some patches upstream to have it be supported properly.

- Eric

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-14  7:40                                       ` Avi Shchislowski
  2022-03-14  8:00                                         ` hch
  2022-03-14 19:50                                         ` Eric Biggers
@ 2022-03-14 19:58                                         ` Jens Axboe
  2022-03-15 15:36                                           ` Luca Porzio (lporzio)
  2 siblings, 1 reply; 43+ messages in thread
From: Jens Axboe @ 2022-03-14 19:58 UTC (permalink / raw)
  To: Avi Shchislowski, Bart Van Assche, Bean Huo (beanhuo),
	Luca Porzio (lporzio),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/14/22 1:40 AM, Avi Shchislowski wrote:
>>>
>>> Hi Jens,
>>>
>>> The "upstream first" policy applies to the Android kernel (see also
>>> https://arstechnica.com/gadgets/2021/09/android-to-take-an-upstream-
>> first-development-model-for-the-linux-kernel/).
>>> If anyone requests inclusion in the Android kernel tree of a patch
>>> that is not upstream, that request is rejected unless a very strong
>>> reason can be provided why it should be included in the Android kernel
>>> only instead of being sent upstream. It is not clear to me why the
>>> patch Bean mentioned is not upstream nor in the upstream Android
>>> kernel tree.
>>>
>>> From a UFS vendor I received the feedback that the F2FS write hint
>>> information helps to reduce write amplification significantly. If the
>>> write hint information is retained in the upstream kernel I can help
>>> with making sure that the UFS patch mentioned above is integrated in
>>> the upstream Linux kernel.
>>
>> I'm really not that interested at this point, and I don't want to gate removal or
>> inclusion of code on some potential future event that may never happen.
>>
>> That doesn't mean that the work and process can't continue on the Android
>> front, the only difference is what the baseline kernel looks like for the
>> submitted patchset.
>>
>> Hence I do think we should go ahead as planned, and then we'll just revisit
>> the topic if/when some actual code comes up.
>>
> We also supports Samsung & Micron approach and sorry to see that this
> functionality has been removed.

This isn't some setup to solicit votes on who supports what. If the code
isn't upstream, it by definition doesn't exist to the kernel. No amount
of "we're also interested in this" changes that.

What I wrote earlier still applies - whoever is interested in supporting
lifetime hints should submit that code upstream. The existing patchset
to clean this up doesn't change that process AT ALL. As mentioned, the
only difference is what the baseline looks like in terms of what the
patchset is based on.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-14 19:58                                         ` [EXT] " Jens Axboe
@ 2022-03-15 15:36                                           ` Luca Porzio (lporzio)
  2022-03-15 15:44                                             ` Jens Axboe
  0 siblings, 1 reply; 43+ messages in thread
From: Luca Porzio (lporzio) @ 2022-03-15 15:36 UTC (permalink / raw)
  To: Jens Axboe, Avi Shchislowski, Bart Van Assche, Bean Huo (beanhuo),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

> 
> This isn't some setup to solicit votes on who supports what. If the code isn't
> upstream, it by definition doesn't exist to the kernel. No amount of "we're
> also interested in this" changes that.
> 
> What I wrote earlier still applies - whoever is interested in supporting lifetime
> hints should submit that code upstream. The existing patchset to clean this
> up doesn't change that process AT ALL. As mentioned, the only difference is
> what the baseline looks like in terms of what the patchset is based on.
> 

Jens, 

Actually we might work to issue a patch and revert the patch plus add the code 
that Bean and Bart mentioned which is currently Android only.
The reason it has not been done before is because for now it's not production yet
but it may soon be that case.

Would this patch revert be an option and accepted as a closure for this discussion?

Another option (which I actually prefer), if I ask for a MM & Storage BoF discussion 
on storage hints where I can show you the status of temperature management
and my studies on how this is beneficial for storage devices. 

Would this be more beneficial and maybe get some wider consensus 
on the write hints?

After that consensus reverting (or agreeing on a new approach) will be easier.

> --
> Jens Axboe

Cheers,
   Luca


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [EXT] Re: [PATCH 2/2] block: remove the per-bio/request write hint.
  2022-03-15 15:36                                           ` Luca Porzio (lporzio)
@ 2022-03-15 15:44                                             ` Jens Axboe
  0 siblings, 0 replies; 43+ messages in thread
From: Jens Axboe @ 2022-03-15 15:44 UTC (permalink / raw)
  To: Luca Porzio (lporzio),
	Avi Shchislowski, Bart Van Assche, Bean Huo (beanhuo),
	Manjong Lee, david
  Cc: hch, kbusch, linux-block, linux-fsdevel, linux-nvme, linux-raid,
	sagi, song, seunghwan.hyun, sookwan7.kim, nanich.lee,
	woosung2.lee, yt0928.kim, junho89.kim, jisoo2146.oh, Jaegeuk Kim

On 3/15/22 9:36 AM, Luca Porzio (lporzio) wrote:
>>
>> This isn't some setup to solicit votes on who supports what. If the code isn't
>> upstream, it by definition doesn't exist to the kernel. No amount of "we're
>> also interested in this" changes that.
>>
>> What I wrote earlier still applies - whoever is interested in supporting lifetime
>> hints should submit that code upstream. The existing patchset to clean this
>> up doesn't change that process AT ALL. As mentioned, the only difference is
>> what the baseline looks like in terms of what the patchset is based on.
>>
> 
> Jens, 
> 
> Actually we might work to issue a patch and revert the patch plus add
> the code that Bean and Bart mentioned which is currently Android only.
> The reason it has not been done before is because for now it's not
> production yet but it may soon be that case.
> 
> Would this patch revert be an option and accepted as a closure for
> this discussion?

What patch revert? It's not clear to me which patch you're talking about
here. If you're talking about the "remove the per-bio/request write
hint" patch, then no, that's certainly not being reverted. See previous
replies I made and also below for why, and let's please stop beating
this dead horse.

> Another option (which I actually prefer), if I ask for a MM & Storage
> BoF discussion on storage hints where I can show you the status of
> temperature management and my studies on how this is beneficial for
> storage devices. 

As long as it's accompanied by code that implements it, then that would
be fine.

> Would this be more beneficial and maybe get some wider consensus on
> the write hints?
> 
> After that consensus reverting (or agreeing on a new approach) will be
> easier.

As I've said multiple times, whenever code is available, it'll be
reviewed and discussed. I don't like to discuss hypotheticals as too
many times in the past there's a promise made and expectations built
only for nothing to materialize. As it stands, the only in-kernel user
of the hints is gone, and that means that the support code is being
removed. We NEVER keep code in the kernel that doesn't have a user, as
it can't get tested.

Submit your patches when they are ready, it really has no bearing on the
currently queued up changes to write hints.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2022-03-15 15:44 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-04 17:55 [PATCH 1/2] nvme: remove support or stream based temperature hint Christoph Hellwig
2022-03-04 17:55 ` [PATCH 2/2] block: remove the per-bio/request write hint Christoph Hellwig
2022-03-04 19:24   ` Jens Axboe
2022-03-04 22:12   ` Dave Chinner
2022-03-05  5:19     ` Christoph Hellwig
2022-03-05  6:09       ` Number of parity disks Kengo.M
2022-03-05  8:56         ` Piergiorgio Sartor
2022-03-05 21:40       ` [PATCH 2/2] block: remove the per-bio/request write hint Dave Chinner
2022-03-05 23:55         ` Bart Van Assche
2022-03-06 17:11         ` Jens Axboe
2022-03-06 18:01           ` Christoph Hellwig
2022-03-06 18:06             ` Jens Axboe
2022-03-06 23:17               ` Dave Chinner
     [not found]                 ` <CGME20220309042324epcas1p111312e20f4429dc3a17172458284a923@epcas1p1.samsung.com>
2022-03-09 13:31                   ` Manjong Lee
2022-03-09  8:24                     ` Paul Menzel
2022-03-10 11:34                     ` [EXT] " Luca Porzio (lporzio)
2022-03-10 12:15                       ` Jens Axboe
2022-03-10 18:50                         ` Luca Porzio (lporzio)
2022-03-10 19:10                           ` Jens Axboe
2022-03-10 19:34                             ` Bart Van Assche
2022-03-10 21:52                               ` Bean Huo (beanhuo)
2022-03-10 22:10                                 ` Jens Axboe
2022-03-11 16:45                                   ` Bart Van Assche
2022-03-11 16:56                                     ` Jens Axboe
2022-03-14  7:40                                       ` Avi Shchislowski
2022-03-14  8:00                                         ` hch
2022-03-14 19:50                                         ` Eric Biggers
2022-03-14 19:58                                         ` [EXT] " Jens Axboe
2022-03-15 15:36                                           ` Luca Porzio (lporzio)
2022-03-15 15:44                                             ` Jens Axboe
2022-03-10 22:18                                 ` Bart Van Assche
2022-03-11  5:31                                   ` Eric Biggers
2022-03-11  5:31                                     ` [f2fs-dev] " Eric Biggers
2022-03-11  9:21                                   ` Luca Porzio (lporzio)
2022-03-10 14:21                       ` hch
2022-03-10 18:51                         ` Luca Porzio (lporzio)
2022-03-10 19:14                           ` hch
2022-03-11  5:06                           ` Eric Biggers
2022-03-11  9:23                             ` Luca Porzio (lporzio)
2022-03-04 19:34 ` [PATCH 1/2] nvme: remove support or stream based temperature hint Keith Busch
2022-03-04 19:36   ` Christoph Hellwig
2022-03-04 19:38     ` Jens Axboe
2022-03-04 20:20 ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.