linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] Support for write stream IDs
@ 2015-03-24 15:26 Jens Axboe
  2015-03-24 15:26 ` [PATCH 1/6] block: add support for carrying a stream ID in a bio Jens Axboe
                   ` (6 more replies)
  0 siblings, 7 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:26 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l

Hi,

One of the things that exacerbates write amplification on flash
based devices is that fact that data with different lifetimes get
grouped together on media. Currently we have no interface that
applications can use to separate different types of writes. This
patch set adds support for that.

The kernel has no knowledge of what stream ID is what. The idea is
that writes with identical stream IDs have similar life times, not
that stream ID 'X' has a shorter lifetime than stream ID 'X+1'.

There are basically two interfaces that could be used for this. One
is fcntl, the other is fadvise. This patchset uses fadvise, with a
new POSIX_FADV_STREAMID hint. The 'offset' field is used to pass
the relevant stream ID. Switching to fcntl (with a SET/GET_STREAMID)
would be trivial.

The patchset wires up the block parts, adds buffered and O_DIRECT
support, and modifies btrfs/xfs too. It should be trivial to extend
this to all other file systems, I just used xfs and btrfs for testing.

No block drivers are wired up yet. Patches are against current -git.

 block/bio.c                  |    2 ++
 block/blk-core.c             |    3 +++
 fs/btrfs/inode.c             |    1 +
 fs/buffer.c                  |    4 ++--
 fs/direct-io.c               |    4 ++++
 fs/fs-writeback.c            |    1 +
 fs/inode.c                   |    1 +
 fs/mpage.c                   |    1 +
 fs/open.c                    |    1 +
 fs/xfs/xfs_aops.c            |    1 +
 include/linux/blk_types.h    |   28 +++++++++++++++++++++++++++-
 include/linux/fs.h           |   11 +++++++++++
 include/linux/writeback.h    |    2 ++
 include/uapi/linux/fadvise.h |    2 ++
 mm/fadvise.c                 |   17 +++++++++++++++++
 mm/filemap.c                 |    1 +
 mm/migrate.c                 |    1 +
 mm/page-writeback.c          |    1 +
 mm/vmscan.c                  |    1 +
 19 files changed, 80 insertions(+), 3 deletions(-)


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
@ 2015-03-24 15:26 ` Jens Axboe
  2015-03-24 17:11   ` Matias Bjørling
  2015-03-25  2:30   ` Dave Chinner
  2015-03-24 15:26 ` [PATCH 2/6] Add support for per-file stream ID Jens Axboe
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:26 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l, Jens Axboe

The top bits of bio->bi_flags are reserved for keeping the
allocation pool, set aside the next four bits for carrying
a stream ID. That leaves us with support for 15 streams,
0 is reserved as a "stream not set" value.

Add helpers for setting/getting stream ID of a bio.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c               |  2 ++
 block/blk-core.c          |  3 +++
 include/linux/blk_types.h | 28 +++++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index f66a4eae16ee..1cd3d745047c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -567,6 +567,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_rw = bio_src->bi_rw;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
+	bio_set_streamid(bio, bio_get_streamid(bio_src));
 }
 EXPORT_SYMBOL(__bio_clone_fast);
 
@@ -672,6 +673,7 @@ integrity_clone:
 		}
 	}
 
+	bio_set_streamid(bio, bio_get_streamid(bio_src));
 	return bio;
 }
 EXPORT_SYMBOL(bio_clone_bioset);
diff --git a/block/blk-core.c b/block/blk-core.c
index 794c3e7f01cf..6b7b8c95c4c3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1928,6 +1928,9 @@ void generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
+		if (bio_streamid_valid(bio))
+			blk_add_trace_msg(q, "StreamID=%u", bio_get_streamid(bio));
+
 		q->make_request_fn(q, bio);
 
 		bio = bio_list_pop(current->bio_list);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index c294e3e25e37..510d1e49ba7d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -138,9 +138,35 @@ struct bio {
 #define BIO_POOL_BITS		(4)
 #define BIO_POOL_NONE		((1UL << BIO_POOL_BITS) - 1)
 #define BIO_POOL_OFFSET		(BITS_PER_LONG - BIO_POOL_BITS)
-#define BIO_POOL_MASK		(1UL << BIO_POOL_OFFSET)
 #define BIO_POOL_IDX(bio)	((bio)->bi_flags >> BIO_POOL_OFFSET)
 
+/*
+ * after the pool bits, next 4 bits are for the stream id
+ */
+#define BIO_STREAM_BITS		(4)
+#define BIO_STREAM_OFFSET	(BITS_PER_LONG - 8)
+#define BIO_STREAM_MASK		((1 << BIO_STREAM_BITS) - 1)
+
+static inline unsigned long streamid_to_flags(unsigned int id)
+{
+	return (unsigned long) (id & BIO_STREAM_MASK) << BIO_STREAM_OFFSET;
+}
+
+static inline void bio_set_streamid(struct bio *bio, unsigned int id)
+{
+	bio->bi_flags |= streamid_to_flags(id);
+}
+
+static inline unsigned int bio_get_streamid(struct bio *bio)
+{
+	return (bio->bi_flags >> BIO_STREAM_OFFSET) & BIO_STREAM_MASK;
+}
+
+static inline bool bio_streamid_valid(struct bio *bio)
+{
+	return bio_get_streamid(bio) != 0;
+}
+
 #endif /* CONFIG_BLOCK */
 
 /*
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/6] Add support for per-file stream ID
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
  2015-03-24 15:26 ` [PATCH 1/6] block: add support for carrying a stream ID in a bio Jens Axboe
@ 2015-03-24 15:26 ` Jens Axboe
  2015-03-24 15:27 ` [PATCH 3/6] direct-io: add support for write stream IDs Jens Axboe
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:26 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l, Jens Axboe

Writing on flash devices can be much more efficient, if we can
inform the device what kind of data can be grouped together. If
the device is able to group data together with similar lifetimes,
then it can be more efficient in garbage collection. This, in turn,
leads to lower write amplification, which is a win on both device
wear and performance.

Add a new fadvise hint, POSIX_FADV_STREAMID, which sets the file
and inode streamid. The file streamid is used if we have the file
available at the time of the write (O_DIRECT), we use the inode
streamid if not (buffered writeback). The fadvise hint uses the
'offset' field to specify a stream ID.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/inode.c                   |  1 +
 fs/open.c                    |  1 +
 include/linux/fs.h           | 11 +++++++++++
 include/uapi/linux/fadvise.h |  2 ++
 mm/fadvise.c                 | 17 +++++++++++++++++
 5 files changed, 32 insertions(+)

diff --git a/fs/inode.c b/fs/inode.c
index f00b16f45507..41885322ba64 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -149,6 +149,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
+	inode->i_streamid = 0;
 	inode->i_pipe = NULL;
 	inode->i_bdev = NULL;
 	inode->i_cdev = NULL;
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..4a9b2be1a674 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -743,6 +743,7 @@ static int do_dentry_open(struct file *f,
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+	f->f_streamid = 0;
 
 	return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b4d71b5e1ff2..6288d9680af6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -631,6 +631,7 @@ struct inode {
 	};
 
 	__u32			i_generation;
+	unsigned int		i_streamid;
 
 #ifdef CONFIG_FSNOTIFY
 	__u32			i_fsnotify_mask; /* all events this inode cares about */
@@ -640,6 +641,14 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 };
 
+static inline unsigned int inode_streamid(struct inode *inode)
+{
+	if (inode)
+		return inode->i_streamid;
+
+	return 0;
+}
+
 static inline int inode_unhashed(struct inode *inode)
 {
 	return hlist_unhashed(&inode->i_hash);
@@ -820,6 +829,8 @@ struct file {
 	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
 
+	unsigned int		f_streamid;
+
 	u64			f_version;
 #ifdef CONFIG_SECURITY
 	void			*f_security;
diff --git a/include/uapi/linux/fadvise.h b/include/uapi/linux/fadvise.h
index e8e747139b9a..3dc8a1ff1422 100644
--- a/include/uapi/linux/fadvise.h
+++ b/include/uapi/linux/fadvise.h
@@ -18,4 +18,6 @@
 #define POSIX_FADV_NOREUSE	5 /* Data will be accessed once.  */
 #endif
 
+#define POSIX_FADV_STREAMID	8 /* associate stream ID with file */
+
 #endif	/* FADVISE_H_INCLUDED */
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 4a3907cf79f8..b111a8899fb7 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -60,6 +60,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 		case POSIX_FADV_WILLNEED:
 		case POSIX_FADV_NOREUSE:
 		case POSIX_FADV_DONTNEED:
+		case POSIX_FADV_STREAMID:
 			/* no bad return value, but ignore advice */
 			break;
 		default:
@@ -144,6 +145,22 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 			}
 		}
 		break;
+	case POSIX_FADV_STREAMID:
+		/*
+		 * streamid is stored in offset... we don't limit or check
+		 * if the device supports streams, or if it does, if the
+		 * stream nr is within the limits. 1 is the lowest valid
+		 * stream id, 0 is "don't care/know".
+		 */
+		if (offset != (unsigned int) offset)
+			ret = EINVAL;
+		else {
+			f.file->f_streamid = offset;
+			spin_lock(&inode->i_lock);
+			inode->i_streamid = offset;
+			spin_unlock(&inode->i_lock);
+		}
+		break;
 	default:
 		ret = -EINVAL;
 	}
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 3/6] direct-io: add support for write stream IDs
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
  2015-03-24 15:26 ` [PATCH 1/6] block: add support for carrying a stream ID in a bio Jens Axboe
  2015-03-24 15:26 ` [PATCH 2/6] Add support for per-file stream ID Jens Axboe
@ 2015-03-24 15:27 ` Jens Axboe
  2015-03-25  2:43   ` Dave Chinner
  2015-03-24 15:27 ` [PATCH 4/6] Add stream ID support for buffered writeback Jens Axboe
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:27 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l, Jens Axboe

Get the streamid from the file, if any, and set it on the bio.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/direct-io.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b2e297..5d2750346451 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -77,6 +77,7 @@ struct dio_submit {
 	int reap_counter;		/* rate limit reaping */
 	sector_t final_block_in_request;/* doesn't change */
 	int boundary;			/* prev block is at a boundary */
+	int streamid;			/* Write stream ID */
 	get_block_t *get_block;		/* block mapping function */
 	dio_submit_t *submit_io;	/* IO submition function */
 
@@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 
 	sdio->bio = bio;
 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
+
+	bio_set_streamid(bio, sdio->streamid);
 }
 
 /*
@@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	sdio.blkbits = blkbits;
 	sdio.blkfactor = i_blkbits - blkbits;
 	sdio.block_in_file = offset >> blkbits;
+	sdio.streamid = iocb->ki_filp->f_streamid;
 
 	sdio.get_block = get_block;
 	dio->end_io = end_io;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 4/6] Add stream ID support for buffered writeback
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
                   ` (2 preceding siblings ...)
  2015-03-24 15:27 ` [PATCH 3/6] direct-io: add support for write stream IDs Jens Axboe
@ 2015-03-24 15:27 ` Jens Axboe
  2015-03-25  2:40   ` Dave Chinner
  2015-03-24 15:27 ` [PATCH 5/6] btrfs: add support for buffered writeback stream ID Jens Axboe
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:27 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l, Jens Axboe

Add a streamid field to the writeback_control structure, and use
it for the various parts of buffered writeback.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/buffer.c               | 4 ++--
 fs/fs-writeback.c         | 1 +
 fs/mpage.c                | 1 +
 include/linux/writeback.h | 2 ++
 mm/filemap.c              | 1 +
 mm/migrate.c              | 1 +
 mm/page-writeback.c       | 1 +
 mm/vmscan.c               | 1 +
 8 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..1ae99868f6fb 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1774,7 +1774,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh(write_op, bh);
+			_submit_bh(write_op, bh, streamid_to_flags(wbc->streamid));
 			nr_underway++;
 		}
 		bh = next;
@@ -1828,7 +1828,7 @@ recover:
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh(write_op, bh);
+			_submit_bh(write_op, bh, streamid_to_flags(wbc->streamid));
 			nr_underway++;
 		}
 		bh = next;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..0d0a9f6268f5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -733,6 +733,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 		 * We use I_SYNC to pin the inode in memory. While it is set
 		 * evict_inode() will wait so the inode cannot be freed.
 		 */
+		wbc.streamid = inode->i_streamid;
 		__writeback_single_inode(inode, &wbc);
 
 		work->nr_pages -= write_chunk - wbc.nr_to_write;
diff --git a/fs/mpage.c b/fs/mpage.c
index 3e79220babac..ae19d35bdd96 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -605,6 +605,7 @@ alloc_new:
 				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
 			goto confused;
+		bio_set_streamid(bio, wbc->streamid);
 	}
 
 	/*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 00048339c23e..3ac2ce545dac 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -78,6 +78,8 @@ struct writeback_control {
 
 	enum writeback_sync_modes sync_mode;
 
+	unsigned int streamid;
+
 	unsigned for_kupdate:1;		/* A kupdate writeback */
 	unsigned for_background:1;	/* A background writeback */
 	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
diff --git a/mm/filemap.c b/mm/filemap.c
index ad7242043bdb..85aa2cc77d67 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -276,6 +276,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
 	struct writeback_control wbc = {
 		.sync_mode = sync_mode,
 		.nr_to_write = LONG_MAX,
+		.streamid = inode_streamid(mapping->host),
 		.range_start = start,
 		.range_end = end,
 	};
diff --git a/mm/migrate.c b/mm/migrate.c
index 85e042686031..614407dceb55 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -645,6 +645,7 @@ static int writeout(struct address_space *mapping, struct page *page)
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 		.nr_to_write = 1,
+		.streamid = inode_streamid(mapping->host),
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 		.for_reclaim = 1
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 45e187b2d971..47120abe8afb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2051,6 +2051,7 @@ int write_one_page(struct page *page, int wait)
 	int ret = 0;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
+		.streamid = inode_streamid(mapping->host),
 		.nr_to_write = 1,
 	};
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5e8eadd71bac..67fcdd082c17 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -546,6 +546,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 		struct writeback_control wbc = {
 			.sync_mode = WB_SYNC_NONE,
 			.nr_to_write = SWAP_CLUSTER_MAX,
+			.streamid = inode_streamid(mapping->host),
 			.range_start = 0,
 			.range_end = LLONG_MAX,
 			.for_reclaim = 1,
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 5/6] btrfs: add support for buffered writeback stream ID
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
                   ` (3 preceding siblings ...)
  2015-03-24 15:27 ` [PATCH 4/6] Add stream ID support for buffered writeback Jens Axboe
@ 2015-03-24 15:27 ` Jens Axboe
  2015-03-24 15:27 ` [PATCH 6/6] xfs: " Jens Axboe
  2015-03-24 17:03 ` [PATCH RFC] Support for write stream IDs Jeff Moyer
  6 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:27 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l, Jens Axboe

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2e732d7af52..804fd6768109 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8046,6 +8046,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 	atomic_set(&dip->pending_bios, 0);
 	btrfs_bio = btrfs_io_bio(io_bio);
 	btrfs_bio->logical = file_offset;
+	bio_set_streamid(io_bio, inode_streamid(inode));
 
 	if (write) {
 		io_bio->bi_end_io = btrfs_endio_direct_write;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 6/6] xfs: add support for buffered writeback stream ID
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
                   ` (4 preceding siblings ...)
  2015-03-24 15:27 ` [PATCH 5/6] btrfs: add support for buffered writeback stream ID Jens Axboe
@ 2015-03-24 15:27 ` Jens Axboe
  2015-03-25  2:41   ` Dave Chinner
  2015-03-24 17:03 ` [PATCH RFC] Support for write stream IDs Jeff Moyer
  6 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 15:27 UTC (permalink / raw)
  To: axboe, linux-kernel, linux-fsdevel; +Cc: ming.l, Jens Axboe

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/xfs/xfs_aops.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1b8704..2b00a57732e3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -377,6 +377,7 @@ xfs_submit_ioend_bio(
 	atomic_inc(&ioend->io_remaining);
 	bio->bi_private = ioend;
 	bio->bi_end_io = xfs_end_bio;
+	bio_set_streamid(bio, wbc->streamid);
 	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH RFC] Support for write stream IDs
  2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
                   ` (5 preceding siblings ...)
  2015-03-24 15:27 ` [PATCH 6/6] xfs: " Jens Axboe
@ 2015-03-24 17:03 ` Jeff Moyer
  2015-03-24 17:08   ` Jens Axboe
  6 siblings, 1 reply; 33+ messages in thread
From: Jeff Moyer @ 2015-03-24 17:03 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-kernel, linux-fsdevel, ming.l

Jens Axboe <axboe@fb.com> writes:

> Hi,
>
> One of the things that exacerbates write amplification on flash
> based devices is that fact that data with different lifetimes get
> grouped together on media. Currently we have no interface that
> applications can use to separate different types of writes. This
> patch set adds support for that.
>
> The kernel has no knowledge of what stream ID is what. The idea is
> that writes with identical stream IDs have similar life times, not
> that stream ID 'X' has a shorter lifetime than stream ID 'X+1'.
>
> There are basically two interfaces that could be used for this. One
> is fcntl, the other is fadvise. This patchset uses fadvise, with a
> new POSIX_FADV_STREAMID hint. The 'offset' field is used to pass
> the relevant stream ID. Switching to fcntl (with a SET/GET_STREAMID)
> would be trivial.
>
> The patchset wires up the block parts, adds buffered and O_DIRECT
> support, and modifies btrfs/xfs too. It should be trivial to extend
> this to all other file systems, I just used xfs and btrfs for testing.
>
> No block drivers are wired up yet. Patches are against current -git.

Can you give an idea of how the stream id would be communicated to the
device?  NVMe doesn't appear to have any notion of a data stream ID.

Cheers,
Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH RFC] Support for write stream IDs
  2015-03-24 17:03 ` [PATCH RFC] Support for write stream IDs Jeff Moyer
@ 2015-03-24 17:08   ` Jens Axboe
  2015-03-24 21:46     ` Ming Lin-SSI
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 17:08 UTC (permalink / raw)
  To: Jeff Moyer, Jens Axboe; +Cc: linux-kernel, linux-fsdevel, ming.l

On 03/24/2015 11:03 AM, Jeff Moyer wrote:
> Jens Axboe <axboe@fb.com> writes:
>
>> Hi,
>>
>> One of the things that exacerbates write amplification on flash
>> based devices is that fact that data with different lifetimes get
>> grouped together on media. Currently we have no interface that
>> applications can use to separate different types of writes. This
>> patch set adds support for that.
>>
>> The kernel has no knowledge of what stream ID is what. The idea is
>> that writes with identical stream IDs have similar life times, not
>> that stream ID 'X' has a shorter lifetime than stream ID 'X+1'.
>>
>> There are basically two interfaces that could be used for this. One
>> is fcntl, the other is fadvise. This patchset uses fadvise, with a
>> new POSIX_FADV_STREAMID hint. The 'offset' field is used to pass
>> the relevant stream ID. Switching to fcntl (with a SET/GET_STREAMID)
>> would be trivial.
>>
>> The patchset wires up the block parts, adds buffered and O_DIRECT
>> support, and modifies btrfs/xfs too. It should be trivial to extend
>> this to all other file systems, I just used xfs and btrfs for testing.
>>
>> No block drivers are wired up yet. Patches are against current -git.
>
> Can you give an idea of how the stream id would be communicated to the
> device?  NVMe doesn't appear to have any notion of a data stream ID.

It doesn't have it, yet, completely. Ming Lin can expand on what it 
looks like for the Samsung nvme devices. Current nvme does have vague 
support for it, however. The write command does have bits for frequent 
vs infrequent vs one-time writes.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 15:26 ` [PATCH 1/6] block: add support for carrying a stream ID in a bio Jens Axboe
@ 2015-03-24 17:11   ` Matias Bjørling
  2015-03-24 17:26     ` Jens Axboe
  2015-03-25  2:30   ` Dave Chinner
  1 sibling, 1 reply; 33+ messages in thread
From: Matias Bjørling @ 2015-03-24 17:11 UTC (permalink / raw)
  To: Jens Axboe, axboe, linux-kernel, linux-fsdevel; +Cc: ming.l

On 03/24/2015 04:26 PM, Jens Axboe wrote:
> The top bits of bio->bi_flags are reserved for keeping the
> allocation pool, set aside the next four bits for carrying
> a stream ID. That leaves us with support for 15 streams,
> 0 is reserved as a "stream not set" value.

15 streams seem very limited. Can this be extended? e.g. 16 bits.

15 streams is enough for 1-4 applications. More, and applications starts 
to fight over the same stream id's, leading them to place different age 
data in same flash blocks and push us back to square one.

I understand that Samsung multi-stream SSD supports a limited amount of 
streams, more advance implementations should provide higher limits.

Thanks,
Matias

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 17:11   ` Matias Bjørling
@ 2015-03-24 17:26     ` Jens Axboe
  2015-03-24 22:07       ` Ming Lin-SSI
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 17:26 UTC (permalink / raw)
  To: Matias Bjørling, Jens Axboe, linux-kernel, linux-fsdevel; +Cc: ming.l

On 03/24/2015 11:11 AM, Matias Bjørling wrote:
> On 03/24/2015 04:26 PM, Jens Axboe wrote:
>> The top bits of bio->bi_flags are reserved for keeping the
>> allocation pool, set aside the next four bits for carrying
>> a stream ID. That leaves us with support for 15 streams,
>> 0 is reserved as a "stream not set" value.
>
> 15 streams seem very limited. Can this be extended? e.g. 16 bits.
>
> 15 streams is enough for 1-4 applications. More, and applications starts
> to fight over the same stream id's, leading them to place different age
> data in same flash blocks and push us back to square one.
>
> I understand that Samsung multi-stream SSD supports a limited amount of
> streams, more advance implementations should provide higher limits.

Pushing it higher is not a big deal as far as the implementation goes, 
though 16 bits might be stealing a bit too much space for this. On 
32-bit archs, we have 18 bits currently free that we can abuse. The 
Samsung device supports 16 streams. That's honestly a lot more than I 
would expect most devices to support in hardware, 16 is a lot of open 
erase blocks and write append points. Obviously the open channel effort 
would make that more feasible, though.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH RFC] Support for write stream IDs
  2015-03-24 17:08   ` Jens Axboe
@ 2015-03-24 21:46     ` Ming Lin-SSI
  2015-03-24 21:48       ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Ming Lin-SSI @ 2015-03-24 21:46 UTC (permalink / raw)
  To: Jens Axboe, Jeff Moyer, Jens Axboe
  Cc: linux-kernel, linux-fsdevel, Changho Choi-SSI

> -----Original Message-----
> From: Jens Axboe [mailto:axboe@kernel.dk]
> Sent: Tuesday, March 24, 2015 10:08 AM
> To: Jeff Moyer; Jens Axboe
> Cc: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org; Ming Lin-
> SSI
> Subject: Re: [PATCH RFC] Support for write stream IDs
> 
> On 03/24/2015 11:03 AM, Jeff Moyer wrote:
> > Jens Axboe <axboe@fb.com> writes:
> >
> >> Hi,
> >>
> >> One of the things that exacerbates write amplification on flash based
> >> devices is that fact that data with different lifetimes get grouped
> >> together on media. Currently we have no interface that applications
> >> can use to separate different types of writes. This patch set adds
> >> support for that.
> >>
> >> The kernel has no knowledge of what stream ID is what. The idea is
> >> that writes with identical stream IDs have similar life times, not
> >> that stream ID 'X' has a shorter lifetime than stream ID 'X+1'.
> >>
> >> There are basically two interfaces that could be used for this. One
> >> is fcntl, the other is fadvise. This patchset uses fadvise, with a
> >> new POSIX_FADV_STREAMID hint. The 'offset' field is used to pass the
> >> relevant stream ID. Switching to fcntl (with a SET/GET_STREAMID)
> >> would be trivial.
> >>
> >> The patchset wires up the block parts, adds buffered and O_DIRECT
> >> support, and modifies btrfs/xfs too. It should be trivial to extend
> >> this to all other file systems, I just used xfs and btrfs for testing.
> >>
> >> No block drivers are wired up yet. Patches are against current -git.
> >
> > Can you give an idea of how the stream id would be communicated to the
> > device?  NVMe doesn't appear to have any notion of a data stream ID.
> 
> It doesn't have it, yet, completely. Ming Lin can expand on what it looks like
> for the Samsung nvme devices. Current nvme does have vague support for it,
> however. The write command does have bits for frequent vs infrequent vs
> one-time writes.

Samsung extended NVMe spec to add "stream control command" to open/close stream.
And with small modification to the "Write command" to write to open stream.

We are promoting the multi-stream spec to NVMe group.

Thanks,
Ming

> 
> --
> Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH RFC] Support for write stream IDs
  2015-03-24 21:46     ` Ming Lin-SSI
@ 2015-03-24 21:48       ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-24 21:48 UTC (permalink / raw)
  To: Ming Lin-SSI, Jeff Moyer, Jens Axboe
  Cc: linux-kernel, linux-fsdevel, Changho Choi-SSI

On 03/24/2015 03:46 PM, Ming Lin-SSI wrote:
>> -----Original Message-----
>> From: Jens Axboe [mailto:axboe@kernel.dk]
>> Sent: Tuesday, March 24, 2015 10:08 AM
>> To: Jeff Moyer; Jens Axboe
>> Cc: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org; Ming Lin-
>> SSI
>> Subject: Re: [PATCH RFC] Support for write stream IDs
>>
>> On 03/24/2015 11:03 AM, Jeff Moyer wrote:
>>> Jens Axboe <axboe@fb.com> writes:
>>>
>>>> Hi,
>>>>
>>>> One of the things that exacerbates write amplification on flash based
>>>> devices is that fact that data with different lifetimes get grouped
>>>> together on media. Currently we have no interface that applications
>>>> can use to separate different types of writes. This patch set adds
>>>> support for that.
>>>>
>>>> The kernel has no knowledge of what stream ID is what. The idea is
>>>> that writes with identical stream IDs have similar life times, not
>>>> that stream ID 'X' has a shorter lifetime than stream ID 'X+1'.
>>>>
>>>> There are basically two interfaces that could be used for this. One
>>>> is fcntl, the other is fadvise. This patchset uses fadvise, with a
>>>> new POSIX_FADV_STREAMID hint. The 'offset' field is used to pass the
>>>> relevant stream ID. Switching to fcntl (with a SET/GET_STREAMID)
>>>> would be trivial.
>>>>
>>>> The patchset wires up the block parts, adds buffered and O_DIRECT
>>>> support, and modifies btrfs/xfs too. It should be trivial to extend
>>>> this to all other file systems, I just used xfs and btrfs for testing.
>>>>
>>>> No block drivers are wired up yet. Patches are against current -git.
>>>
>>> Can you give an idea of how the stream id would be communicated to the
>>> device?  NVMe doesn't appear to have any notion of a data stream ID.
>>
>> It doesn't have it, yet, completely. Ming Lin can expand on what it looks like
>> for the Samsung nvme devices. Current nvme does have vague support for it,
>> however. The write command does have bits for frequent vs infrequent vs
>> one-time writes.
>
> Samsung extended NVMe spec to add "stream control command" to open/close stream.
> And with small modification to the "Write command" to write to open stream.
>
> We are promoting the multi-stream spec to NVMe group.

Can you share the nvme spec proposal with us?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 17:26     ` Jens Axboe
@ 2015-03-24 22:07       ` Ming Lin-SSI
  2015-03-25  1:42         ` Jens Axboe
  2015-03-25  8:11         ` Matias Bjørling
  0 siblings, 2 replies; 33+ messages in thread
From: Ming Lin-SSI @ 2015-03-24 22:07 UTC (permalink / raw)
  To: Jens Axboe, Matias Bjørling, Jens Axboe, linux-kernel,
	linux-fsdevel

> -----Original Message-----
> From: Jens Axboe [mailto:axboe@kernel.dk]
> Sent: Tuesday, March 24, 2015 10:27 AM
> To: Matias Bjørling; Jens Axboe; linux-kernel@vger.kernel.org; linux-
> fsdevel@vger.kernel.org
> Cc: Ming Lin-SSI
> Subject: Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
> 
> On 03/24/2015 11:11 AM, Matias Bjørling wrote:
> > On 03/24/2015 04:26 PM, Jens Axboe wrote:
> >> The top bits of bio->bi_flags are reserved for keeping the allocation
> >> pool, set aside the next four bits for carrying a stream ID. That
> >> leaves us with support for 15 streams,
> >> 0 is reserved as a "stream not set" value.
> >
> > 15 streams seem very limited. Can this be extended? e.g. 16 bits.
> >
> > 15 streams is enough for 1-4 applications. More, and applications
> > starts to fight over the same stream id's, leading them to place
> > different age data in same flash blocks and push us back to square one.
> >
> > I understand that Samsung multi-stream SSD supports a limited amount
> > of streams, more advance implementations should provide higher limits.
> 
> Pushing it higher is not a big deal as far as the implementation goes, though
> 16 bits might be stealing a bit too much space for this. On 32-bit archs, we
> have 18 bits currently free that we can abuse. The Samsung device supports
> 16 streams. That's honestly a lot more than I would expect most devices to
> support in hardware, 16 is a lot of open erase blocks and write append points.
> Obviously the open channel effort would make that more feasible, though.

Can we use 8 bits at least? I'll test performance with 16 streams.

> 
> --
> Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 22:07       ` Ming Lin-SSI
@ 2015-03-25  1:42         ` Jens Axboe
  2015-03-25  8:11         ` Matias Bjørling
  1 sibling, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-25  1:42 UTC (permalink / raw)
  To: Ming Lin-SSI, Matias Bjørling, Jens Axboe, linux-kernel,
	linux-fsdevel

On 03/24/2015 04:07 PM, Ming Lin-SSI wrote:
>> -----Original Message-----
>> From: Jens Axboe [mailto:axboe@kernel.dk]
>> Sent: Tuesday, March 24, 2015 10:27 AM
>> To: Matias Bjørling; Jens Axboe; linux-kernel@vger.kernel.org; linux-
>> fsdevel@vger.kernel.org
>> Cc: Ming Lin-SSI
>> Subject: Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
>>
>> On 03/24/2015 11:11 AM, Matias Bjørling wrote:
>>> On 03/24/2015 04:26 PM, Jens Axboe wrote:
>>>> The top bits of bio->bi_flags are reserved for keeping the allocation
>>>> pool, set aside the next four bits for carrying a stream ID. That
>>>> leaves us with support for 15 streams,
>>>> 0 is reserved as a "stream not set" value.
>>>
>>> 15 streams seem very limited. Can this be extended? e.g. 16 bits.
>>>
>>> 15 streams is enough for 1-4 applications. More, and applications
>>> starts to fight over the same stream id's, leading them to place
>>> different age data in same flash blocks and push us back to square one.
>>>
>>> I understand that Samsung multi-stream SSD supports a limited amount
>>> of streams, more advance implementations should provide higher limits.
>>
>> Pushing it higher is not a big deal as far as the implementation goes, though
>> 16 bits might be stealing a bit too much space for this. On 32-bit archs, we
>> have 18 bits currently free that we can abuse. The Samsung device supports
>> 16 streams. That's honestly a lot more than I would expect most devices to
>> support in hardware, 16 is a lot of open erase blocks and write append points.
>> Obviously the open channel effort would make that more feasible, though.
>
> Can we use 8 bits at least? I'll test performance with 16 streams.

We could, but I still question whether that's really useful. I'd rather 
start smaller and go bigger if there's a real use case for it. It wont 
change the user space ABI if we later make it larger.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 15:26 ` [PATCH 1/6] block: add support for carrying a stream ID in a bio Jens Axboe
  2015-03-24 17:11   ` Matias Bjørling
@ 2015-03-25  2:30   ` Dave Chinner
  2015-04-12 10:42     ` Dmitry Monakhov
  1 sibling, 1 reply; 33+ messages in thread
From: Dave Chinner @ 2015-03-25  2:30 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-kernel, linux-fsdevel, ming.l

On Tue, Mar 24, 2015 at 09:26:58AM -0600, Jens Axboe wrote:
> The top bits of bio->bi_flags are reserved for keeping the
> allocation pool, set aside the next four bits for carrying
> a stream ID. That leaves us with support for 15 streams,
> 0 is reserved as a "stream not set" value.
> 
> Add helpers for setting/getting stream ID of a bio.
....
> +/*
> + * after the pool bits, next 4 bits are for the stream id
> + */
> +#define BIO_STREAM_BITS		(4)
> +#define BIO_STREAM_OFFSET	(BITS_PER_LONG - 8)
> +#define BIO_STREAM_MASK		((1 << BIO_STREAM_BITS) - 1)
> +
> +static inline unsigned long streamid_to_flags(unsigned int id)
> +{
> +	return (unsigned long) (id & BIO_STREAM_MASK) << BIO_STREAM_OFFSET;
> +}
> +
> +static inline void bio_set_streamid(struct bio *bio, unsigned int id)
> +{
> +	bio->bi_flags |= streamid_to_flags(id);
> +}
> +
> +static inline unsigned int bio_get_streamid(struct bio *bio)
> +{
> +	return (bio->bi_flags >> BIO_STREAM_OFFSET) & BIO_STREAM_MASK;
> +}
> +
> +static inline bool bio_streamid_valid(struct bio *bio)
> +{
> +	return bio_get_streamid(bio) != 0;
> +}

Need to reserve at least one stream for filesystem private use (e.g.
metadata writeback). Potentially 2 streams - one for the journal
which is frequently overwritten, the other for all other long lived
persistent metadata.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] Add stream ID support for buffered writeback
  2015-03-24 15:27 ` [PATCH 4/6] Add stream ID support for buffered writeback Jens Axboe
@ 2015-03-25  2:40   ` Dave Chinner
  2015-03-25 14:17     ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Dave Chinner @ 2015-03-25  2:40 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-kernel, linux-fsdevel, ming.l

On Tue, Mar 24, 2015 at 09:27:01AM -0600, Jens Axboe wrote:
> Add a streamid field to the writeback_control structure, and use
> it for the various parts of buffered writeback.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  fs/buffer.c               | 4 ++--
>  fs/fs-writeback.c         | 1 +
>  fs/mpage.c                | 1 +
>  include/linux/writeback.h | 2 ++
>  mm/filemap.c              | 1 +
>  mm/migrate.c              | 1 +
>  mm/page-writeback.c       | 1 +
>  mm/vmscan.c               | 1 +
>  8 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 20805db2c987..1ae99868f6fb 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1774,7 +1774,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>  	do {
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
> -			submit_bh(write_op, bh);
> +			_submit_bh(write_op, bh, streamid_to_flags(wbc->streamid));

Urk, the is the second patchset in a couple of days to add some
random parameter to submit_bh like this (control group thottling
patch from Tejun was the other).

This doesn't scale....

> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> index 00048339c23e..3ac2ce545dac 100644
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -78,6 +78,8 @@ struct writeback_control {
>  
>  	enum writeback_sync_modes sync_mode;
>  
> +	unsigned int streamid;
> +
>  	unsigned for_kupdate:1;		/* A kupdate writeback */
>  	unsigned for_background:1;	/* A background writeback */
>  	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
> diff --git a/mm/filemap.c b/mm/filemap.c
> index ad7242043bdb..85aa2cc77d67 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -276,6 +276,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
>  	struct writeback_control wbc = {
>  		.sync_mode = sync_mode,
>  		.nr_to_write = LONG_MAX,
> +		.streamid = inode_streamid(mapping->host),
>  		.range_start = start,
>  		.range_end = end,
>  	};

I don't think this is the right layer to be specifying the stream
id. When we do buffered writeback, the filesystem's .writepage
implementation gets called and it has access to the inode and hence
can grab the stream id there. the struct wbc is used for writeback
across more than one inode, and hence there's going to be nothing
but confusion when this gets set and we iterate writeback across
multiple inodes.

i.e. the stream id should be set by the code the packs the pages
into the bio/bh that is being submitted where there is a direct
relationship between the inode and the IO being built, rather than
at a high layer where the stream id has ambiguous meaning....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/6] xfs: add support for buffered writeback stream ID
  2015-03-24 15:27 ` [PATCH 6/6] xfs: " Jens Axboe
@ 2015-03-25  2:41   ` Dave Chinner
  0 siblings, 0 replies; 33+ messages in thread
From: Dave Chinner @ 2015-03-25  2:41 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-kernel, linux-fsdevel, ming.l

On Tue, Mar 24, 2015 at 09:27:03AM -0600, Jens Axboe wrote:
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  fs/xfs/xfs_aops.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 3a9b7a1b8704..2b00a57732e3 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -377,6 +377,7 @@ xfs_submit_ioend_bio(
>  	atomic_inc(&ioend->io_remaining);
>  	bio->bi_private = ioend;
>  	bio->bi_end_io = xfs_end_bio;
> +	bio_set_streamid(bio, wbc->streamid);
>  	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);

Like I said, this is wrong.

+	bio_set_streamid(bio, ioend->io_inode->i_streamid);

is actually what it should be.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-03-24 15:27 ` [PATCH 3/6] direct-io: add support for write stream IDs Jens Axboe
@ 2015-03-25  2:43   ` Dave Chinner
  2015-03-25 14:26     ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Dave Chinner @ 2015-03-25  2:43 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-kernel, linux-fsdevel, ming.l

On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
> Get the streamid from the file, if any, and set it on the bio.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  fs/direct-io.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index e181b6b2e297..5d2750346451 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -77,6 +77,7 @@ struct dio_submit {
>  	int reap_counter;		/* rate limit reaping */
>  	sector_t final_block_in_request;/* doesn't change */
>  	int boundary;			/* prev block is at a boundary */
> +	int streamid;			/* Write stream ID */
>  	get_block_t *get_block;		/* block mapping function */
>  	dio_submit_t *submit_io;	/* IO submition function */
>  
> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
>  
>  	sdio->bio = bio;
>  	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
> +
> +	bio_set_streamid(bio, sdio->streamid);
>  }
>  
>  /*
> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
>  	sdio.blkbits = blkbits;
>  	sdio.blkfactor = i_blkbits - blkbits;
>  	sdio.block_in_file = offset >> blkbits;
> +	sdio.streamid = iocb->ki_filp->f_streamid;

If iocb->ki_filp->f_streamid is not set, then it should fall back to
whatever is set on the inode->i_streamid.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-24 22:07       ` Ming Lin-SSI
  2015-03-25  1:42         ` Jens Axboe
@ 2015-03-25  8:11         ` Matias Bjørling
  2015-03-25 18:36           ` Ming Lin-SSI
  1 sibling, 1 reply; 33+ messages in thread
From: Matias Bjørling @ 2015-03-25  8:11 UTC (permalink / raw)
  To: Ming Lin-SSI, Jens Axboe, linux-kernel, linux-fsdevel

>> Pushing it higher is not a big deal as far as the implementation goes, though
>> 16 bits might be stealing a bit too much space for this. On 32-bit archs, we
>> have 18 bits currently free that we can abuse. The Samsung device supports
>> 16 streams. That's honestly a lot more than I would expect most devices to
>> support in hardware, 16 is a lot of open erase blocks and write append points.
>> Obviously the open channel effort would make that more feasible, though.
>
> Can we use 8 bits at least? I'll test performance with 16 streams.
>

Ming, can you provide an example of how streams will be managed for 
multiple applications? I can see how it would be efficient for a single 
application, but how will it be managed for multiple applications?

-Matias

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] Add stream ID support for buffered writeback
  2015-03-25  2:40   ` Dave Chinner
@ 2015-03-25 14:17     ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-03-25 14:17 UTC (permalink / raw)
  To: Dave Chinner, Jens Axboe; +Cc: linux-kernel, linux-fsdevel, ming.l

On 03/24/2015 08:40 PM, Dave Chinner wrote:
> On Tue, Mar 24, 2015 at 09:27:01AM -0600, Jens Axboe wrote:
>> Add a streamid field to the writeback_control structure, and use
>> it for the various parts of buffered writeback.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>   fs/buffer.c               | 4 ++--
>>   fs/fs-writeback.c         | 1 +
>>   fs/mpage.c                | 1 +
>>   include/linux/writeback.h | 2 ++
>>   mm/filemap.c              | 1 +
>>   mm/migrate.c              | 1 +
>>   mm/page-writeback.c       | 1 +
>>   mm/vmscan.c               | 1 +
>>   8 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/buffer.c b/fs/buffer.c
>> index 20805db2c987..1ae99868f6fb 100644
>> --- a/fs/buffer.c
>> +++ b/fs/buffer.c
>> @@ -1774,7 +1774,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>>   	do {
>>   		struct buffer_head *next = bh->b_this_page;
>>   		if (buffer_async_write(bh)) {
>> -			submit_bh(write_op, bh);
>> +			_submit_bh(write_op, bh, streamid_to_flags(wbc->streamid));
>
> Urk, the is the second patchset in a couple of days to add some
> random parameter to submit_bh like this (control group thottling
> patch from Tejun was the other).
>
> This doesn't scale....

It's not adding a parameter, it's just calling _submit_bh() instead of 
submit_bh().

>> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
>> index 00048339c23e..3ac2ce545dac 100644
>> --- a/include/linux/writeback.h
>> +++ b/include/linux/writeback.h
>> @@ -78,6 +78,8 @@ struct writeback_control {
>>
>>   	enum writeback_sync_modes sync_mode;
>>
>> +	unsigned int streamid;
>> +
>>   	unsigned for_kupdate:1;		/* A kupdate writeback */
>>   	unsigned for_background:1;	/* A background writeback */
>>   	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index ad7242043bdb..85aa2cc77d67 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -276,6 +276,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
>>   	struct writeback_control wbc = {
>>   		.sync_mode = sync_mode,
>>   		.nr_to_write = LONG_MAX,
>> +		.streamid = inode_streamid(mapping->host),
>>   		.range_start = start,
>>   		.range_end = end,
>>   	};
>
> I don't think this is the right layer to be specifying the stream
> id. When we do buffered writeback, the filesystem's .writepage
> implementation gets called and it has access to the inode and hence
> can grab the stream id there. the struct wbc is used for writeback
> across more than one inode, and hence there's going to be nothing
> but confusion when this gets set and we iterate writeback across
> multiple inodes.
>
> i.e. the stream id should be set by the code the packs the pages
> into the bio/bh that is being submitted where there is a direct
> relationship between the inode and the IO being built, rather than
> at a high layer where the stream id has ambiguous meaning....

Yeah that's a good point, it was a bit of a lazy mans solution to not 
have to touch all the ->writepage(s) hooks. But you are right, I'll do 
that instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-03-25  2:43   ` Dave Chinner
@ 2015-03-25 14:26     ` Jens Axboe
  2015-04-10 23:50       ` Ming Lin
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-03-25 14:26 UTC (permalink / raw)
  To: Dave Chinner, Jens Axboe; +Cc: linux-kernel, linux-fsdevel, ming.l

On 03/24/2015 08:43 PM, Dave Chinner wrote:
> On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
>> Get the streamid from the file, if any, and set it on the bio.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>   fs/direct-io.c | 4 ++++
>>   1 file changed, 4 insertions(+)
>>
>> diff --git a/fs/direct-io.c b/fs/direct-io.c
>> index e181b6b2e297..5d2750346451 100644
>> --- a/fs/direct-io.c
>> +++ b/fs/direct-io.c
>> @@ -77,6 +77,7 @@ struct dio_submit {
>>   	int reap_counter;		/* rate limit reaping */
>>   	sector_t final_block_in_request;/* doesn't change */
>>   	int boundary;			/* prev block is at a boundary */
>> +	int streamid;			/* Write stream ID */
>>   	get_block_t *get_block;		/* block mapping function */
>>   	dio_submit_t *submit_io;	/* IO submition function */
>>
>> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
>>
>>   	sdio->bio = bio;
>>   	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
>> +
>> +	bio_set_streamid(bio, sdio->streamid);
>>   }
>>
>>   /*
>> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
>>   	sdio.blkbits = blkbits;
>>   	sdio.blkfactor = i_blkbits - blkbits;
>>   	sdio.block_in_file = offset >> blkbits;
>> +	sdio.streamid = iocb->ki_filp->f_streamid;
>
> If iocb->ki_filp->f_streamid is not set, then it should fall back to
> whatever is set on the inode->i_streamid.

Good point, agree. Will make that change.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-25  8:11         ` Matias Bjørling
@ 2015-03-25 18:36           ` Ming Lin-SSI
  0 siblings, 0 replies; 33+ messages in thread
From: Ming Lin-SSI @ 2015-03-25 18:36 UTC (permalink / raw)
  To: Matias Bjørling, Jens Axboe, linux-kernel, linux-fsdevel

> -----Original Message-----
> From: Matias Bjørling [mailto:m@bjorling.me]
> Sent: Wednesday, March 25, 2015 1:11 AM
> To: Ming Lin-SSI; Jens Axboe; linux-kernel@vger.kernel.org; linux-
> fsdevel@vger.kernel.org
> Subject: Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
> 
> >> Pushing it higher is not a big deal as far as the implementation
> >> goes, though
> >> 16 bits might be stealing a bit too much space for this. On 32-bit
> >> archs, we have 18 bits currently free that we can abuse. The Samsung
> >> device supports
> >> 16 streams. That's honestly a lot more than I would expect most
> >> devices to support in hardware, 16 is a lot of open erase blocks and write
> append points.
> >> Obviously the open channel effort would make that more feasible,
> though.
> >
> > Can we use 8 bits at least? I'll test performance with 16 streams.
> >
> 
> Ming, can you provide an example of how streams will be managed for
> multiple applications? I can see how it would be efficient for a single
> application, but how will it be managed for multiple applications?

Multiple applications will get different stream-id.
For example,

Application 1:
stream_id = open_stream(NVME_DEVICE_HANDLE,  ....); //maybe stream-id 1
fadvise(fd, stream_id, 0, POSIX_FADV_STREAMID); 
write(fd, buf, count);
close_stream(NVME_DEVICE_HANDLE, stream_id);

Application 2:
stream_id = open_stream(NVME_DEVICE_HANDLE,  ....); //maybe stream-id 2
fadvise(fd, stream_id, 0, POSIX_FADV_STREAMID); 
write(fd, buf, count);
close_stream(NVME_DEVICE_HANDLE, stream_id);

Thanks,
Ming

> 
> -Matias

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-03-25 14:26     ` Jens Axboe
@ 2015-04-10 23:50       ` Ming Lin
  2015-04-11  0:06         ` Ming Lin
                           ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Ming Lin @ 2015-04-10 23:50 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Dave Chinner, Jens Axboe, lkml, linux-fsdevel, ming.l

On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
> On 03/24/2015 08:43 PM, Dave Chinner wrote:
>>
>> On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
>>>
>>> Get the streamid from the file, if any, and set it on the bio.
>>>
>>> Signed-off-by: Jens Axboe <axboe@fb.com>
>>> ---
>>>   fs/direct-io.c | 4 ++++
>>>   1 file changed, 4 insertions(+)
>>>
>>> diff --git a/fs/direct-io.c b/fs/direct-io.c
>>> index e181b6b2e297..5d2750346451 100644
>>> --- a/fs/direct-io.c
>>> +++ b/fs/direct-io.c
>>> @@ -77,6 +77,7 @@ struct dio_submit {
>>>         int reap_counter;               /* rate limit reaping */
>>>         sector_t final_block_in_request;/* doesn't change */
>>>         int boundary;                   /* prev block is at a boundary */
>>> +       int streamid;                   /* Write stream ID */
>>>         get_block_t *get_block;         /* block mapping function */
>>>         dio_submit_t *submit_io;        /* IO submition function */
>>>
>>> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit
>>> *sdio,
>>>
>>>         sdio->bio = bio;
>>>         sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
>>> +
>>> +       bio_set_streamid(bio, sdio->streamid);
>>>   }
>>>
>>>   /*
>>> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb,
>>> struct inode *inode,
>>>         sdio.blkbits = blkbits;
>>>         sdio.blkfactor = i_blkbits - blkbits;
>>>         sdio.block_in_file = offset >> blkbits;
>>> +       sdio.streamid = iocb->ki_filp->f_streamid;
>>
>>
>> If iocb->ki_filp->f_streamid is not set, then it should fall back to
>> whatever is set on the inode->i_streamid.

Why should do the fall back?

>
>
> Good point, agree. Will make that change.

Hi Jens,

That change causes problem for direct IO, for example

process 1:
fd = open("/dev/nvme0n1", O_DIRECT...);
//set stream_id 1
fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
pwrite(fd, ....);

process 2:
fd = open("/dev/nvme0n1", O_DIRECT...);
//should be legacy stream_id 0
pwrite(fd, ....);

But now process 2 also see stream_id 1, which is wrong.

Thanks,
Ming

>
>
> --
> Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-10 23:50       ` Ming Lin
@ 2015-04-11  0:06         ` Ming Lin
  2015-04-11 11:59         ` Dave Chinner
  2015-04-17 15:17         ` Jens Axboe
  2 siblings, 0 replies; 33+ messages in thread
From: Ming Lin @ 2015-04-11  0:06 UTC (permalink / raw)
  To: Ming Lin
  Cc: Jens Axboe, Dave Chinner, Jens Axboe, lkml, linux-fsdevel, ming.l

On Fri, Apr 10, 2015 at 4:50 PM, Ming Lin <mlin@kernel.org> wrote:
> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
>> On 03/24/2015 08:43 PM, Dave Chinner wrote:
>>>
>>> On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
>>>>
>>>> Get the streamid from the file, if any, and set it on the bio.
>>>>
>>>> Signed-off-by: Jens Axboe <axboe@fb.com>
>>>> ---
>>>>   fs/direct-io.c | 4 ++++
>>>>   1 file changed, 4 insertions(+)
>>>>
>>>> diff --git a/fs/direct-io.c b/fs/direct-io.c
>>>> index e181b6b2e297..5d2750346451 100644
>>>> --- a/fs/direct-io.c
>>>> +++ b/fs/direct-io.c
>>>> @@ -77,6 +77,7 @@ struct dio_submit {
>>>>         int reap_counter;               /* rate limit reaping */
>>>>         sector_t final_block_in_request;/* doesn't change */
>>>>         int boundary;                   /* prev block is at a boundary */
>>>> +       int streamid;                   /* Write stream ID */
>>>>         get_block_t *get_block;         /* block mapping function */
>>>>         dio_submit_t *submit_io;        /* IO submition function */
>>>>
>>>> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit
>>>> *sdio,
>>>>
>>>>         sdio->bio = bio;
>>>>         sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
>>>> +
>>>> +       bio_set_streamid(bio, sdio->streamid);
>>>>   }
>>>>
>>>>   /*
>>>> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb,
>>>> struct inode *inode,
>>>>         sdio.blkbits = blkbits;
>>>>         sdio.blkfactor = i_blkbits - blkbits;
>>>>         sdio.block_in_file = offset >> blkbits;
>>>> +       sdio.streamid = iocb->ki_filp->f_streamid;
>>>
>>>
>>> If iocb->ki_filp->f_streamid is not set, then it should fall back to
>>> whatever is set on the inode->i_streamid.
>
> Why should do the fall back?
>
>>
>>
>> Good point, agree. Will make that change.
>
> Hi Jens,
>
> That change causes problem for direct IO, for example

Actually, buffered write also has problem.

process 1:
fd = open("/mnt/foo.txt", ...);
//set stream_id 1
fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
write(fd, ....);

process 1 exit, but stream_id is still saved in inode

later process 2 starts,
and when writeback it will see stream_id 1 from inode although it
didn't set stream_id at all

process 2:
fd = open("/mnt/foo.txt", ...);
write(fd, ....);

Thanks,
Ming

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-10 23:50       ` Ming Lin
  2015-04-11  0:06         ` Ming Lin
@ 2015-04-11 11:59         ` Dave Chinner
  2015-04-17  6:20           ` Ming Lin
  2015-04-17 15:17         ` Jens Axboe
  2 siblings, 1 reply; 33+ messages in thread
From: Dave Chinner @ 2015-04-11 11:59 UTC (permalink / raw)
  To: Ming Lin; +Cc: Jens Axboe, Jens Axboe, lkml, linux-fsdevel, ming.l

On Fri, Apr 10, 2015 at 04:50:05PM -0700, Ming Lin wrote:
> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
> > On 03/24/2015 08:43 PM, Dave Chinner wrote:
> >>
> >> On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
> >>>
> >>> Get the streamid from the file, if any, and set it on the bio.
> >>>
> >>> Signed-off-by: Jens Axboe <axboe@fb.com>
> >>> ---
> >>>   fs/direct-io.c | 4 ++++
> >>>   1 file changed, 4 insertions(+)
> >>>
> >>> diff --git a/fs/direct-io.c b/fs/direct-io.c
> >>> index e181b6b2e297..5d2750346451 100644
> >>> --- a/fs/direct-io.c
> >>> +++ b/fs/direct-io.c
> >>> @@ -77,6 +77,7 @@ struct dio_submit {
> >>>         int reap_counter;               /* rate limit reaping */
> >>>         sector_t final_block_in_request;/* doesn't change */
> >>>         int boundary;                   /* prev block is at a boundary */
> >>> +       int streamid;                   /* Write stream ID */
> >>>         get_block_t *get_block;         /* block mapping function */
> >>>         dio_submit_t *submit_io;        /* IO submition function */
> >>>
> >>> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit
> >>> *sdio,
> >>>
> >>>         sdio->bio = bio;
> >>>         sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
> >>> +
> >>> +       bio_set_streamid(bio, sdio->streamid);
> >>>   }
> >>>
> >>>   /*
> >>> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb,
> >>> struct inode *inode,
> >>>         sdio.blkbits = blkbits;
> >>>         sdio.blkfactor = i_blkbits - blkbits;
> >>>         sdio.block_in_file = offset >> blkbits;
> >>> +       sdio.streamid = iocb->ki_filp->f_streamid;
> >>
> >>
> >> If iocb->ki_filp->f_streamid is not set, then it should fall back to
> >> whatever is set on the inode->i_streamid.
> 
> Why should do the fall back?

Because then you have a method of using streams with applications
that aren't aware of streams.

Or perhaps you have a file you know has different access patterns to
the rest of the files in a directory, and you don't want to have to
set the stream on every process that opens and uses that file. e.g.
database writeahead log files (sequential write, never read) vs
database index/table files (random read/write).....

> > Good point, agree. Will make that change.
> 
> That change causes problem for direct IO, for example
> 
> process 1:
> fd = open("/dev/nvme0n1", O_DIRECT...);
> //set stream_id 1
> fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
> pwrite(fd, ....);
> 
> process 2:
> fd = open("/dev/nvme0n1", O_DIRECT...);
> //should be legacy stream_id 0
> pwrite(fd, ....);
> 
> But now process 2 also see stream_id 1, which is wrong.

It's not wrong, your behaviour model is just different You have
defined a process/fd based stream model and not considered
considered that admins and applications might want to use a file
based stream model instead, so applications don't need to even be
aware that write streams are in use...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] block: add support for carrying a stream ID in a bio
  2015-03-25  2:30   ` Dave Chinner
@ 2015-04-12 10:42     ` Dmitry Monakhov
  0 siblings, 0 replies; 33+ messages in thread
From: Dmitry Monakhov @ 2015-04-12 10:42 UTC (permalink / raw)
  To: Dave Chinner, Jens Axboe; +Cc: axboe, linux-kernel, linux-fsdevel, ming.l


Dave Chinner <david@fromorbit.com> writes:

> On Tue, Mar 24, 2015 at 09:26:58AM -0600, Jens Axboe wrote:
>> The top bits of bio->bi_flags are reserved for keeping the
>> allocation pool, set aside the next four bits for carrying
>> a stream ID. That leaves us with support for 15 streams,
>> 0 is reserved as a "stream not set" value.
>> 
>> Add helpers for setting/getting stream ID of a bio.
> ....
>> +/*
>> + * after the pool bits, next 4 bits are for the stream id
>> + */
>> +#define BIO_STREAM_BITS		(4)
>> +#define BIO_STREAM_OFFSET	(BITS_PER_LONG - 8)
>> +#define BIO_STREAM_MASK		((1 << BIO_STREAM_BITS) - 1)
>> +
>> +static inline unsigned long streamid_to_flags(unsigned int id)
>> +{
>> +	return (unsigned long) (id & BIO_STREAM_MASK) << BIO_STREAM_OFFSET;
>> +}
>> +
>> +static inline void bio_set_streamid(struct bio *bio, unsigned int id)
>> +{
>> +	bio->bi_flags |= streamid_to_flags(id);
>> +}
>> +
>> +static inline unsigned int bio_get_streamid(struct bio *bio)
>> +{
>> +	return (bio->bi_flags >> BIO_STREAM_OFFSET) & BIO_STREAM_MASK;
>> +}
>> +
>> +static inline bool bio_streamid_valid(struct bio *bio)
>> +{
>> +	return bio_get_streamid(bio) != 0;
>> +}
>
> Need to reserve at least one stream for filesystem private use (e.g.
> metadata writeback). Potentially 2 streams - one for the journal
> which is frequently overwritten, the other for all other long lived
> persistent metadata.
Definitely. User may set it only if it has CAP_RESOURCES.
This is fun, but we act as a soviet nomenclature who try to monopolize
food distribution system :)
>
> Cheers,
>
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-11 11:59         ` Dave Chinner
@ 2015-04-17  6:20           ` Ming Lin
  2015-04-17 23:06             ` Dave Chinner
  0 siblings, 1 reply; 33+ messages in thread
From: Ming Lin @ 2015-04-17  6:20 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Ming Lin, Jens Axboe, Jens Axboe, lkml, linux-fsdevel, ming.l,
	Kwan (Hingkwan) Huen-SSI

On Sat, Apr 11, 2015 at 4:59 AM, Dave Chinner <david@fromorbit.com> wrote:
> On Fri, Apr 10, 2015 at 04:50:05PM -0700, Ming Lin wrote:
>> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
>> > On 03/24/2015 08:43 PM, Dave Chinner wrote:
>> >>
>> >> On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
>> >>>
>> >>> Get the streamid from the file, if any, and set it on the bio.
>> >>>
>> >>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> >>> ---
>> >>>   fs/direct-io.c | 4 ++++
>> >>>   1 file changed, 4 insertions(+)
>> >>>
>> >>> diff --git a/fs/direct-io.c b/fs/direct-io.c
>> >>> index e181b6b2e297..5d2750346451 100644
>> >>> --- a/fs/direct-io.c
>> >>> +++ b/fs/direct-io.c
>> >>> @@ -77,6 +77,7 @@ struct dio_submit {
>> >>>         int reap_counter;               /* rate limit reaping */
>> >>>         sector_t final_block_in_request;/* doesn't change */
>> >>>         int boundary;                   /* prev block is at a boundary */
>> >>> +       int streamid;                   /* Write stream ID */
>> >>>         get_block_t *get_block;         /* block mapping function */
>> >>>         dio_submit_t *submit_io;        /* IO submition function */
>> >>>
>> >>> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit
>> >>> *sdio,
>> >>>
>> >>>         sdio->bio = bio;
>> >>>         sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
>> >>> +
>> >>> +       bio_set_streamid(bio, sdio->streamid);
>> >>>   }
>> >>>
>> >>>   /*
>> >>> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb,
>> >>> struct inode *inode,
>> >>>         sdio.blkbits = blkbits;
>> >>>         sdio.blkfactor = i_blkbits - blkbits;
>> >>>         sdio.block_in_file = offset >> blkbits;
>> >>> +       sdio.streamid = iocb->ki_filp->f_streamid;
>> >>
>> >>
>> >> If iocb->ki_filp->f_streamid is not set, then it should fall back to
>> >> whatever is set on the inode->i_streamid.
>>
>> Why should do the fall back?
>
> Because then you have a method of using streams with applications
> that aren't aware of streams.
>
> Or perhaps you have a file you know has different access patterns to
> the rest of the files in a directory, and you don't want to have to
> set the stream on every process that opens and uses that file. e.g.
> database writeahead log files (sequential write, never read) vs
> database index/table files (random read/write).....
>
>> > Good point, agree. Will make that change.
>>
>> That change causes problem for direct IO, for example
>>
>> process 1:
>> fd = open("/dev/nvme0n1", O_DIRECT...);
>> //set stream_id 1
>> fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
>> pwrite(fd, ....);
>>
>> process 2:
>> fd = open("/dev/nvme0n1", O_DIRECT...);
>> //should be legacy stream_id 0
>> pwrite(fd, ....);
>>
>> But now process 2 also see stream_id 1, which is wrong.
>
> It's not wrong, your behaviour model is just different You have
> defined a process/fd based stream model and not considered
> considered that admins and applications might want to use a file
> based stream model instead, so applications don't need to even be
> aware that write streams are in use...

The stream must be opened, otherwise device will return error if application
write to a not-opened stream.

Device has limited number of streams, for example, 16 streams.
There are 2 APIs to open/close the stream.

process 1:
fd = open("/dev/nvme0n1", O_DIRECT...);
stream_id = open_stream("/dev/nvme0n1", ....);
fadvise(fd, stream_id, 0, POSIX_FADV_STREAMID);
pwrite(fd, ....);
close_stream("/dev/nvme0n1", stream_id);

process 2:
fd = open("/dev/nvme0n1", O_DIRECT...);
//should be legacy stream_id 0
pwrite(fd, ....);

Now process 2 gets error because the "stream_id" was already closed.

Thanks,
Ming

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-10 23:50       ` Ming Lin
  2015-04-11  0:06         ` Ming Lin
  2015-04-11 11:59         ` Dave Chinner
@ 2015-04-17 15:17         ` Jens Axboe
  2 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-04-17 15:17 UTC (permalink / raw)
  To: Ming Lin, Jens Axboe; +Cc: Dave Chinner, lkml, linux-fsdevel, ming.l

On 04/10/2015 05:50 PM, Ming Lin wrote:
> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
>> On 03/24/2015 08:43 PM, Dave Chinner wrote:
>>>
>>> On Tue, Mar 24, 2015 at 09:27:00AM -0600, Jens Axboe wrote:
>>>>
>>>> Get the streamid from the file, if any, and set it on the bio.
>>>>
>>>> Signed-off-by: Jens Axboe <axboe@fb.com>
>>>> ---
>>>>    fs/direct-io.c | 4 ++++
>>>>    1 file changed, 4 insertions(+)
>>>>
>>>> diff --git a/fs/direct-io.c b/fs/direct-io.c
>>>> index e181b6b2e297..5d2750346451 100644
>>>> --- a/fs/direct-io.c
>>>> +++ b/fs/direct-io.c
>>>> @@ -77,6 +77,7 @@ struct dio_submit {
>>>>          int reap_counter;               /* rate limit reaping */
>>>>          sector_t final_block_in_request;/* doesn't change */
>>>>          int boundary;                   /* prev block is at a boundary */
>>>> +       int streamid;                   /* Write stream ID */
>>>>          get_block_t *get_block;         /* block mapping function */
>>>>          dio_submit_t *submit_io;        /* IO submition function */
>>>>
>>>> @@ -372,6 +373,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit
>>>> *sdio,
>>>>
>>>>          sdio->bio = bio;
>>>>          sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
>>>> +
>>>> +       bio_set_streamid(bio, sdio->streamid);
>>>>    }
>>>>
>>>>    /*
>>>> @@ -1205,6 +1208,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb,
>>>> struct inode *inode,
>>>>          sdio.blkbits = blkbits;
>>>>          sdio.blkfactor = i_blkbits - blkbits;
>>>>          sdio.block_in_file = offset >> blkbits;
>>>> +       sdio.streamid = iocb->ki_filp->f_streamid;
>>>
>>>
>>> If iocb->ki_filp->f_streamid is not set, then it should fall back to
>>> whatever is set on the inode->i_streamid.
>
> Why should do the fall back?

Because the assumption is that, in general, the specific file is a good 
indication of the data lifetime, if someone has already set that. It's a 
better guess than writing without any stream attached.

> That change causes problem for direct IO, for example
>
> process 1:
> fd = open("/dev/nvme0n1", O_DIRECT...);
> //set stream_id 1
> fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
> pwrite(fd, ....);
>
> process 2:
> fd = open("/dev/nvme0n1", O_DIRECT...);
> //should be legacy stream_id 0
> pwrite(fd, ....);
>
> But now process 2 also see stream_id 1, which is wrong.

I guess for that case, it is a problem. Basically the fallback breaks 
down for full block devices, or huge files that are used as a general 
backing store (like a vm image, for instance). Hmm, not sure what the 
right solution would be here, or if there really is one. It's probably 
best NOT to do the fallback after all.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-17  6:20           ` Ming Lin
@ 2015-04-17 23:06             ` Dave Chinner
  2015-04-17 23:11               ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Dave Chinner @ 2015-04-17 23:06 UTC (permalink / raw)
  To: Ming Lin
  Cc: Jens Axboe, Jens Axboe, lkml, linux-fsdevel, ming.l,
	Kwan (Hingkwan) Huen-SSI

On Thu, Apr 16, 2015 at 11:20:45PM -0700, Ming Lin wrote:
> On Sat, Apr 11, 2015 at 4:59 AM, Dave Chinner <david@fromorbit.com> wrote:
> > On Fri, Apr 10, 2015 at 04:50:05PM -0700, Ming Lin wrote:
> >> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
> >> >> If iocb->ki_filp->f_streamid is not set, then it should fall back to
> >> >> whatever is set on the inode->i_streamid.
> >>
> >> Why should do the fall back?
> >
> > Because then you have a method of using streams with applications
> > that aren't aware of streams.
> >
> > Or perhaps you have a file you know has different access patterns to
> > the rest of the files in a directory, and you don't want to have to
> > set the stream on every process that opens and uses that file. e.g.
> > database writeahead log files (sequential write, never read) vs
> > database index/table files (random read/write).....
> >
> >> > Good point, agree. Will make that change.
> >>
> >> That change causes problem for direct IO, for example
> >>
> >> process 1:
> >> fd = open("/dev/nvme0n1", O_DIRECT...);
> >> //set stream_id 1
> >> fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
> >> pwrite(fd, ....);
> >>
> >> process 2:
> >> fd = open("/dev/nvme0n1", O_DIRECT...);
> >> //should be legacy stream_id 0
> >> pwrite(fd, ....);
> >>
> >> But now process 2 also see stream_id 1, which is wrong.
> >
> > It's not wrong, your behaviour model is just different You have
> > defined a process/fd based stream model and not considered
> > considered that admins and applications might want to use a file
> > based stream model instead, so applications don't need to even be
> > aware that write streams are in use...
> 
> The stream must be opened, otherwise device will return error if application
> write to a not-opened stream.

That's an extremely device specific *implementation* of a write
stream. The *concept* of a write stream being passed from userspace to
the block layer doesn't have such constraints, and I get realy
concerned when implementations of a generic concept are so tightly
focussed around one type of hardware implementation of the
concept...

> Device has limited number of streams, for example, 16 streams.
> There are 2 APIs to open/close the stream.

What's to stop me writing something for DM-thinp that understands
write streams in bios and uses it to separate out the write streams
into different regions of the thinp device to improve locality of
it's data placement and hence reduce fragmentation?

Yes, for nvme devices, the "streamid" might come from hardware,
but there's nothing stopping other storage devices using it
differently or having fewer constraints. e.g. unknown ID -> same as
stream 0....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-17 23:06             ` Dave Chinner
@ 2015-04-17 23:11               ` Jens Axboe
  2015-04-17 23:51                 ` Dave Chinner
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2015-04-17 23:11 UTC (permalink / raw)
  To: Dave Chinner, Ming Lin
  Cc: Jens Axboe, lkml, linux-fsdevel, ming.l, Kwan (Hingkwan) Huen-SSI

On 04/17/2015 05:06 PM, Dave Chinner wrote:
> On Thu, Apr 16, 2015 at 11:20:45PM -0700, Ming Lin wrote:
>> On Sat, Apr 11, 2015 at 4:59 AM, Dave Chinner <david@fromorbit.com> wrote:
>>> On Fri, Apr 10, 2015 at 04:50:05PM -0700, Ming Lin wrote:
>>>> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
>>>>>> If iocb->ki_filp->f_streamid is not set, then it should fall back to
>>>>>> whatever is set on the inode->i_streamid.
>>>>
>>>> Why should do the fall back?
>>>
>>> Because then you have a method of using streams with applications
>>> that aren't aware of streams.
>>>
>>> Or perhaps you have a file you know has different access patterns to
>>> the rest of the files in a directory, and you don't want to have to
>>> set the stream on every process that opens and uses that file. e.g.
>>> database writeahead log files (sequential write, never read) vs
>>> database index/table files (random read/write).....
>>>
>>>>> Good point, agree. Will make that change.
>>>>
>>>> That change causes problem for direct IO, for example
>>>>
>>>> process 1:
>>>> fd = open("/dev/nvme0n1", O_DIRECT...);
>>>> //set stream_id 1
>>>> fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
>>>> pwrite(fd, ....);
>>>>
>>>> process 2:
>>>> fd = open("/dev/nvme0n1", O_DIRECT...);
>>>> //should be legacy stream_id 0
>>>> pwrite(fd, ....);
>>>>
>>>> But now process 2 also see stream_id 1, which is wrong.
>>>
>>> It's not wrong, your behaviour model is just different You have
>>> defined a process/fd based stream model and not considered
>>> considered that admins and applications might want to use a file
>>> based stream model instead, so applications don't need to even be
>>> aware that write streams are in use...
>>
>> The stream must be opened, otherwise device will return error if application
>> write to a not-opened stream.
>
> That's an extremely device specific *implementation* of a write
> stream. The *concept* of a write stream being passed from userspace to
> the block layer doesn't have such constraints, and I get realy
> concerned when implementations of a generic concept are so tightly
> focussed around one type of hardware implementation of the
> concept...

Indeed, which is why the implementation posted cares ONLY about the 
stream ID itself, and passing that through.

But the point about fallback is valid, however, for some use cases that 
will not be what you want. But we have to make some sort of decision, 
and falling back to the inode set value (if one is set) is probably the 
right thing to do in most use cases.

>> Device has limited number of streams, for example, 16 streams.
>> There are 2 APIs to open/close the stream.
>
> What's to stop me writing something for DM-thinp that understands
> write streams in bios and uses it to separate out the write streams
> into different regions of the thinp device to improve locality of
> it's data placement and hence reduce fragmentation?

Absolutely nothing, in fact that's one of the use cases that I had in 
mind. Or for for caching software.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-17 23:11               ` Jens Axboe
@ 2015-04-17 23:51                 ` Dave Chinner
  2015-04-18  2:00                   ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Dave Chinner @ 2015-04-17 23:51 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Ming Lin, Jens Axboe, lkml, linux-fsdevel, ming.l,
	Kwan (Hingkwan) Huen-SSI

On Fri, Apr 17, 2015 at 05:11:40PM -0600, Jens Axboe wrote:
> On 04/17/2015 05:06 PM, Dave Chinner wrote:
> >On Thu, Apr 16, 2015 at 11:20:45PM -0700, Ming Lin wrote:
> >>On Sat, Apr 11, 2015 at 4:59 AM, Dave Chinner <david@fromorbit.com> wrote:
> >>>On Fri, Apr 10, 2015 at 04:50:05PM -0700, Ming Lin wrote:
> >>>>On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
> >>>>>>If iocb->ki_filp->f_streamid is not set, then it should fall back to
> >>>>>>whatever is set on the inode->i_streamid.
> >>>>
> >>>>Why should do the fall back?
> >>>
> >>>Because then you have a method of using streams with applications
> >>>that aren't aware of streams.
> >>>
> >>>Or perhaps you have a file you know has different access patterns to
> >>>the rest of the files in a directory, and you don't want to have to
> >>>set the stream on every process that opens and uses that file. e.g.
> >>>database writeahead log files (sequential write, never read) vs
> >>>database index/table files (random read/write).....
> >>>
> >>>>>Good point, agree. Will make that change.
> >>>>
> >>>>That change causes problem for direct IO, for example
> >>>>
> >>>>process 1:
> >>>>fd = open("/dev/nvme0n1", O_DIRECT...);
> >>>>//set stream_id 1
> >>>>fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
> >>>>pwrite(fd, ....);
> >>>>
> >>>>process 2:
> >>>>fd = open("/dev/nvme0n1", O_DIRECT...);
> >>>>//should be legacy stream_id 0
> >>>>pwrite(fd, ....);
> >>>>
> >>>>But now process 2 also see stream_id 1, which is wrong.
> >>>
> >>>It's not wrong, your behaviour model is just different You have
> >>>defined a process/fd based stream model and not considered
> >>>considered that admins and applications might want to use a file
> >>>based stream model instead, so applications don't need to even be
> >>>aware that write streams are in use...
> >>
> >>The stream must be opened, otherwise device will return error if application
> >>write to a not-opened stream.
> >
> >That's an extremely device specific *implementation* of a write
> >stream. The *concept* of a write stream being passed from userspace to
> >the block layer doesn't have such constraints, and I get realy
> >concerned when implementations of a generic concept are so tightly
> >focussed around one type of hardware implementation of the
> >concept...
> 
> Indeed, which is why the implementation posted cares ONLY about the
> stream ID itself, and passing that through.
> 
> But the point about fallback is valid, however, for some use cases
> that will not be what you want. But we have to make some sort of
> decision, and falling back to the inode set value (if one is set) is
> probably the right thing to do in most use cases.

Right, the question is then whether fadvise should set the value on
the inode at all, because then the effect of setting it on a fd also
changes the fallback. Perhaps we need to a distinction between
"setting the stream for this fd" which lasts as long as the fd is
active, and "setting the default inode stream" which is potentially
a persistent operation if the filesystem stores it on disk...

> >>Device has limited number of streams, for example, 16 streams.
> >>There are 2 APIs to open/close the stream.
> >
> >What's to stop me writing something for DM-thinp that understands
> >write streams in bios and uses it to separate out the write streams
> >into different regions of the thinp device to improve locality of
> >it's data placement and hence reduce fragmentation?
> 
> Absolutely nothing, in fact that's one of the use cases that I had
> in mind. Or for for caching software.

*nod*. We are on the same page, then :)

Cheers,

Dave.
> 
> -- 
> Jens Axboe
> 
> 

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] direct-io: add support for write stream IDs
  2015-04-17 23:51                 ` Dave Chinner
@ 2015-04-18  2:00                   ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2015-04-18  2:00 UTC (permalink / raw)
  To: Dave Chinner, Jens Axboe
  Cc: Ming Lin, lkml, linux-fsdevel, ming.l, Kwan (Hingkwan) Huen-SSI

On 04/17/2015 05:51 PM, Dave Chinner wrote:
> On Fri, Apr 17, 2015 at 05:11:40PM -0600, Jens Axboe wrote:
>> On 04/17/2015 05:06 PM, Dave Chinner wrote:
>>> On Thu, Apr 16, 2015 at 11:20:45PM -0700, Ming Lin wrote:
>>>> On Sat, Apr 11, 2015 at 4:59 AM, Dave Chinner <david@fromorbit.com> wrote:
>>>>> On Fri, Apr 10, 2015 at 04:50:05PM -0700, Ming Lin wrote:
>>>>>> On Wed, Mar 25, 2015 at 7:26 AM, Jens Axboe <axboe@kernel.dk> wrote:
>>>>>>>> If iocb->ki_filp->f_streamid is not set, then it should fall back to
>>>>>>>> whatever is set on the inode->i_streamid.
>>>>>>
>>>>>> Why should do the fall back?
>>>>>
>>>>> Because then you have a method of using streams with applications
>>>>> that aren't aware of streams.
>>>>>
>>>>> Or perhaps you have a file you know has different access patterns to
>>>>> the rest of the files in a directory, and you don't want to have to
>>>>> set the stream on every process that opens and uses that file. e.g.
>>>>> database writeahead log files (sequential write, never read) vs
>>>>> database index/table files (random read/write).....
>>>>>
>>>>>>> Good point, agree. Will make that change.
>>>>>>
>>>>>> That change causes problem for direct IO, for example
>>>>>>
>>>>>> process 1:
>>>>>> fd = open("/dev/nvme0n1", O_DIRECT...);
>>>>>> //set stream_id 1
>>>>>> fadvise(fd, 1, 0, POSIX_FADV_STREAMID);
>>>>>> pwrite(fd, ....);
>>>>>>
>>>>>> process 2:
>>>>>> fd = open("/dev/nvme0n1", O_DIRECT...);
>>>>>> //should be legacy stream_id 0
>>>>>> pwrite(fd, ....);
>>>>>>
>>>>>> But now process 2 also see stream_id 1, which is wrong.
>>>>>
>>>>> It's not wrong, your behaviour model is just different You have
>>>>> defined a process/fd based stream model and not considered
>>>>> considered that admins and applications might want to use a file
>>>>> based stream model instead, so applications don't need to even be
>>>>> aware that write streams are in use...
>>>>
>>>> The stream must be opened, otherwise device will return error if application
>>>> write to a not-opened stream.
>>>
>>> That's an extremely device specific *implementation* of a write
>>> stream. The *concept* of a write stream being passed from userspace to
>>> the block layer doesn't have such constraints, and I get realy
>>> concerned when implementations of a generic concept are so tightly
>>> focussed around one type of hardware implementation of the
>>> concept...
>>
>> Indeed, which is why the implementation posted cares ONLY about the
>> stream ID itself, and passing that through.
>>
>> But the point about fallback is valid, however, for some use cases
>> that will not be what you want. But we have to make some sort of
>> decision, and falling back to the inode set value (if one is set) is
>> probably the right thing to do in most use cases.
>
> Right, the question is then whether fadvise should set the value on
> the inode at all, because then the effect of setting it on a fd also
> changes the fallback. Perhaps we need to a distinction between
> "setting the stream for this fd" which lasts as long as the fd is
> active, and "setting the default inode stream" which is potentially
> a persistent operation if the filesystem stores it on disk...

Yes, that might be a good compromise. The easiest would be to define a 
second fadvise advice, where the stronger advice would be file + inode. 
Another option would be changing the file approach to use fcntl(), and 
keeping the fadvise for the inode. I'll be happy to take input on what 
people would prefer here.

>>>> Device has limited number of streams, for example, 16 streams.
>>>> There are 2 APIs to open/close the stream.
>>>
>>> What's to stop me writing something for DM-thinp that understands
>>> write streams in bios and uses it to separate out the write streams
>>> into different regions of the thinp device to improve locality of
>>> it's data placement and hence reduce fragmentation?
>>
>> Absolutely nothing, in fact that's one of the use cases that I had
>> in mind. Or for for caching software.
>
> *nod*. We are on the same page, then :)

Yes completely, basically just wanted to clarify that.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2015-04-18  2:01 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-24 15:26 [PATCH RFC] Support for write stream IDs Jens Axboe
2015-03-24 15:26 ` [PATCH 1/6] block: add support for carrying a stream ID in a bio Jens Axboe
2015-03-24 17:11   ` Matias Bjørling
2015-03-24 17:26     ` Jens Axboe
2015-03-24 22:07       ` Ming Lin-SSI
2015-03-25  1:42         ` Jens Axboe
2015-03-25  8:11         ` Matias Bjørling
2015-03-25 18:36           ` Ming Lin-SSI
2015-03-25  2:30   ` Dave Chinner
2015-04-12 10:42     ` Dmitry Monakhov
2015-03-24 15:26 ` [PATCH 2/6] Add support for per-file stream ID Jens Axboe
2015-03-24 15:27 ` [PATCH 3/6] direct-io: add support for write stream IDs Jens Axboe
2015-03-25  2:43   ` Dave Chinner
2015-03-25 14:26     ` Jens Axboe
2015-04-10 23:50       ` Ming Lin
2015-04-11  0:06         ` Ming Lin
2015-04-11 11:59         ` Dave Chinner
2015-04-17  6:20           ` Ming Lin
2015-04-17 23:06             ` Dave Chinner
2015-04-17 23:11               ` Jens Axboe
2015-04-17 23:51                 ` Dave Chinner
2015-04-18  2:00                   ` Jens Axboe
2015-04-17 15:17         ` Jens Axboe
2015-03-24 15:27 ` [PATCH 4/6] Add stream ID support for buffered writeback Jens Axboe
2015-03-25  2:40   ` Dave Chinner
2015-03-25 14:17     ` Jens Axboe
2015-03-24 15:27 ` [PATCH 5/6] btrfs: add support for buffered writeback stream ID Jens Axboe
2015-03-24 15:27 ` [PATCH 6/6] xfs: " Jens Axboe
2015-03-25  2:41   ` Dave Chinner
2015-03-24 17:03 ` [PATCH RFC] Support for write stream IDs Jeff Moyer
2015-03-24 17:08   ` Jens Axboe
2015-03-24 21:46     ` Ming Lin-SSI
2015-03-24 21:48       ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).