All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] brd: Allow to change block sizes
@ 2023-03-06 12:01 Hannes Reinecke
  2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
                   ` (5 more replies)
  0 siblings, 6 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-06 12:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch,
	Hannes Reinecke

Hi all,

meat to the bone: with this patchset one can change the physical and
logical block size of the 'brd' ramdisk driver.
Default is 512 (for both); one can easily increase the physical block
size to 16k and the logical block size to 4k.
Increasing the logcial block size beyond 4k gives some 'interesting'
crashes.
It should also be possible to use the resulting ram disk as a backend
for nvme target, thereby exercising the NVMe stack for larger block
sizes, too.

Happy hacking!

Hannes Reinecke (5):
  brd: convert to folios
  brd: abstract page_size conventions
  brd: make sector size configurable
  brd: limit maximal block size to 32M
  brd: make logical sector size configurable

 drivers/block/brd.c | 244 ++++++++++++++++++++++++--------------------
 1 file changed, 136 insertions(+), 108 deletions(-)

-- 
2.35.3


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/5] brd: convert to folios
  2023-03-06 12:01 [PATCH 0/5] brd: Allow to change block sizes Hannes Reinecke
@ 2023-03-06 12:01 ` Hannes Reinecke
  2023-03-06 16:04   ` kernel test robot
                     ` (2 more replies)
  2023-03-06 12:01 ` [PATCH 2/5] brd: abstract page_size conventions Hannes Reinecke
                   ` (4 subsequent siblings)
  5 siblings, 3 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-06 12:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch,
	Hannes Reinecke

Convert the driver to work on folios instead of pages.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/brd.c | 171 ++++++++++++++++++++++----------------------
 1 file changed, 85 insertions(+), 86 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 34177f1bd97d..7efc276c4963 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -28,8 +28,8 @@
 #include <linux/uaccess.h>
 
 /*
- * Each block ramdisk device has a radix_tree brd_pages of pages that stores
- * the pages containing the block device's contents. A brd page's ->index is
+ * Each block ramdisk device has a radix_tree brd_folios of folios that stores
+ * the folios containing the block device's contents. A brd folio's ->index is
  * its offset in PAGE_SIZE units. This is similar to, but in no way connected
  * with, the kernel's pagecache or buffer cache (which sit above our block
  * device).
@@ -40,25 +40,25 @@ struct brd_device {
 	struct list_head	brd_list;
 
 	/*
-	 * Backing store of pages and lock to protect it. This is the contents
-	 * of the block device.
+	 * Backing store of folios and lock to protect it.
+	 * This is the contents of the block device.
 	 */
 	spinlock_t		brd_lock;
-	struct radix_tree_root	brd_pages;
-	u64			brd_nr_pages;
+	struct radix_tree_root	brd_folios;
+	u64			brd_nr_folios;
 };
 
 /*
- * Look up and return a brd's page for a given sector.
+ * Look up and return a brd's folio for a given sector.
  */
-static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
+static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
 {
 	pgoff_t idx;
-	struct page *page;
+	struct folio *folio;
 
 	/*
-	 * The page lifetime is protected by the fact that we have opened the
-	 * device node -- brd pages will never be deleted under us, so we
+	 * The folio lifetime is protected by the fact that we have opened the
+	 * device node -- brd folios will never be deleted under us, so we
 	 * don't need any further locking or refcounting.
 	 *
 	 * This is strictly true for the radix-tree nodes as well (ie. we
@@ -68,49 +68,49 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
 	 * here, only deletes).
 	 */
 	rcu_read_lock();
-	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
-	page = radix_tree_lookup(&brd->brd_pages, idx);
+	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to folio index */
+	folio = radix_tree_lookup(&brd->brd_folios, idx);
 	rcu_read_unlock();
 
-	BUG_ON(page && page->index != idx);
+	BUG_ON(folio && folio->index != idx);
 
-	return page;
+	return folio;
 }
 
 /*
- * Insert a new page for a given sector, if one does not already exist.
+ * Insert a new folio for a given sector, if one does not already exist.
  */
-static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
+static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
 {
 	pgoff_t idx;
-	struct page *page;
+	struct folio *folio;
 	int ret = 0;
 
-	page = brd_lookup_page(brd, sector);
-	if (page)
+	folio = brd_lookup_folio(brd, sector);
+	if (folio)
 		return 0;
 
-	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
-	if (!page)
+	folio = folio_alloc(gfp | __GFP_ZERO, 0);
+	if (!folio)
 		return -ENOMEM;
 
 	if (radix_tree_maybe_preload(gfp)) {
-		__free_page(page);
+		folio_put(folio);
 		return -ENOMEM;
 	}
 
 	spin_lock(&brd->brd_lock);
 	idx = sector >> PAGE_SECTORS_SHIFT;
-	page->index = idx;
-	if (radix_tree_insert(&brd->brd_pages, idx, page)) {
-		__free_page(page);
-		page = radix_tree_lookup(&brd->brd_pages, idx);
-		if (!page)
+	folio->index = idx;
+	if (radix_tree_insert(&brd->brd_folios, idx, folio)) {
+		folio_put(folio);
+		folio = radix_tree_lookup(&brd->brd_folios, idx);
+		if (!folio)
 			ret = -ENOMEM;
-		else if (page->index != idx)
+		else if (folio->index != idx)
 			ret = -EIO;
 	} else {
-		brd->brd_nr_pages++;
+		brd->brd_nr_folios++;
 	}
 	spin_unlock(&brd->brd_lock);
 
@@ -119,30 +119,30 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
 }
 
 /*
- * Free all backing store pages and radix tree. This must only be called when
+ * Free all backing store folios and radix tree. This must only be called when
  * there are no other users of the device.
  */
 #define FREE_BATCH 16
-static void brd_free_pages(struct brd_device *brd)
+static void brd_free_folios(struct brd_device *brd)
 {
 	unsigned long pos = 0;
-	struct page *pages[FREE_BATCH];
-	int nr_pages;
+	struct folio *folios[FREE_BATCH];
+	int nr_folios;
 
 	do {
 		int i;
 
-		nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
-				(void **)pages, pos, FREE_BATCH);
+		nr_folios = radix_tree_gang_lookup(&brd->brd_folios,
+				(void **)folios, pos, FREE_BATCH);
 
-		for (i = 0; i < nr_pages; i++) {
+		for (i = 0; i < nr_folios; i++) {
 			void *ret;
 
-			BUG_ON(pages[i]->index < pos);
-			pos = pages[i]->index;
-			ret = radix_tree_delete(&brd->brd_pages, pos);
-			BUG_ON(!ret || ret != pages[i]);
-			__free_page(pages[i]);
+			BUG_ON(folios[i]->index < pos);
+			pos = folios[i]->index;
+			ret = radix_tree_delete(&brd->brd_folios, pos);
+			BUG_ON(!ret || ret != folios[i]);
+			folio_put(folios[i]);
 		}
 
 		pos++;
@@ -155,10 +155,10 @@ static void brd_free_pages(struct brd_device *brd)
 
 		/*
 		 * This assumes radix_tree_gang_lookup always returns as
-		 * many pages as possible. If the radix-tree code changes,
+		 * many folios as possible. If the radix-tree code changes,
 		 * so will this have to.
 		 */
-	} while (nr_pages == FREE_BATCH);
+	} while (nr_folios == FREE_BATCH);
 }
 
 /*
@@ -172,12 +172,12 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 	int ret;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
-	ret = brd_insert_page(brd, sector, gfp);
+	ret = brd_insert_folio(brd, sector, gfp);
 	if (ret)
 		return ret;
 	if (copy < n) {
 		sector += copy >> SECTOR_SHIFT;
-		ret = brd_insert_page(brd, sector, gfp);
+		ret = brd_insert_folio(brd, sector, gfp);
 	}
 	return ret;
 }
@@ -188,29 +188,29 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 static void copy_to_brd(struct brd_device *brd, const void *src,
 			sector_t sector, size_t n)
 {
-	struct page *page;
+	struct folio *folio;
 	void *dst;
 	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
 	size_t copy;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
-	page = brd_lookup_page(brd, sector);
-	BUG_ON(!page);
+	folio = brd_lookup_folio(brd, sector);
+	BUG_ON(!folio);
 
-	dst = kmap_atomic(page);
-	memcpy(dst + offset, src, copy);
-	kunmap_atomic(dst);
+	dst = kmap_local_folio(folio, offset);
+	memcpy(dst, src, copy);
+	kunmap_local(dst);
 
 	if (copy < n) {
 		src += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
-		page = brd_lookup_page(brd, sector);
-		BUG_ON(!page);
+		folio = brd_lookup_folio(brd, sector);
+		BUG_ON(!folio);
 
-		dst = kmap_atomic(page);
+		dst = kmap_local_folio(folio, 0);
 		memcpy(dst, src, copy);
-		kunmap_atomic(dst);
+		kunmap_local(dst);
 	}
 }
 
@@ -220,17 +220,17 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
 static void copy_from_brd(void *dst, struct brd_device *brd,
 			sector_t sector, size_t n)
 {
-	struct page *page;
+	struct folio *folio;
 	void *src;
 	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
 	size_t copy;
 
 	copy = min_t(size_t, n, PAGE_SIZE - offset);
-	page = brd_lookup_page(brd, sector);
-	if (page) {
-		src = kmap_atomic(page);
-		memcpy(dst, src + offset, copy);
-		kunmap_atomic(src);
+	folio = brd_lookup_folio(brd, sector);
+	if (folio) {
+		src = kmap_local_folio(folio, offset);
+		memcpy(dst, src, copy);
+		kunmap_local(src);
 	} else
 		memset(dst, 0, copy);
 
@@ -238,20 +238,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 		dst += copy;
 		sector += copy >> SECTOR_SHIFT;
 		copy = n - copy;
-		page = brd_lookup_page(brd, sector);
-		if (page) {
-			src = kmap_atomic(page);
+		folio = brd_lookup_folio(brd, sector);
+		if (folio) {
+			src = kmap_local_folio(folio, 0);
 			memcpy(dst, src, copy);
-			kunmap_atomic(src);
+			kunmap_local(src);
 		} else
 			memset(dst, 0, copy);
 	}
 }
 
 /*
- * Process a single bvec of a bio.
+ * Process a single folio of a bio.
  */
-static int brd_do_bvec(struct brd_device *brd, struct page *page,
+static int brd_do_folio(struct brd_device *brd, struct folio *folio,
 			unsigned int len, unsigned int off, blk_opf_t opf,
 			sector_t sector)
 {
@@ -261,7 +261,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 	if (op_is_write(opf)) {
 		/*
 		 * Must use NOIO because we don't want to recurse back into the
-		 * block or filesystem layers from page reclaim.
+		 * block or filesystem layers from folio reclaim.
 		 */
 		gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
 
@@ -270,15 +270,15 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 			goto out;
 	}
 
-	mem = kmap_atomic(page);
+	mem = kmap_local_folio(folio, off);
 	if (!op_is_write(opf)) {
-		copy_from_brd(mem + off, brd, sector, len);
-		flush_dcache_page(page);
+		copy_from_brd(mem, brd, sector, len);
+		flush_dcache_folio(folio);
 	} else {
-		flush_dcache_page(page);
-		copy_to_brd(brd, mem + off, sector, len);
+		flush_dcache_folio(folio);
+		copy_to_brd(brd, mem, sector, len);
 	}
-	kunmap_atomic(mem);
+	kunmap_local(mem);
 
 out:
 	return err;
@@ -288,19 +288,18 @@ static void brd_submit_bio(struct bio *bio)
 {
 	struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
 	sector_t sector = bio->bi_iter.bi_sector;
-	struct bio_vec bvec;
-	struct bvec_iter iter;
+	struct folio_iter iter;
 
-	bio_for_each_segment(bvec, bio, iter) {
-		unsigned int len = bvec.bv_len;
+	bio_for_each_folio_all(iter, bio) {
+		unsigned int len = iter.length;
 		int err;
 
 		/* Don't support un-aligned buffer */
-		WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
+		WARN_ON_ONCE((iter.offset & (SECTOR_SIZE - 1)) ||
 				(len & (SECTOR_SIZE - 1)));
 
-		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
-				  bio->bi_opf, sector);
+		err = brd_do_folio(brd, iter.folio, len, iter.offset,
+				   bio->bi_opf, sector);
 		if (err) {
 			if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
 				bio_wouldblock_error(bio);
@@ -373,12 +372,12 @@ static int brd_alloc(int i)
 	list_add_tail(&brd->brd_list, &brd_devices);
 
 	spin_lock_init(&brd->brd_lock);
-	INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
+	INIT_RADIX_TREE(&brd->brd_folios, GFP_ATOMIC);
 
 	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
 	if (!IS_ERR_OR_NULL(brd_debugfs_dir))
 		debugfs_create_u64(buf, 0444, brd_debugfs_dir,
-				&brd->brd_nr_pages);
+				&brd->brd_nr_folios);
 
 	disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
@@ -434,7 +433,7 @@ static void brd_cleanup(void)
 	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
 		del_gendisk(brd->brd_disk);
 		put_disk(brd->brd_disk);
-		brd_free_pages(brd);
+		brd_free_folios(brd);
 		list_del(&brd->brd_list);
 		kfree(brd);
 	}
@@ -465,7 +464,7 @@ static int __init brd_init(void)
 
 	brd_check_and_reset_par();
 
-	brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+	brd_debugfs_dir = debugfs_create_dir("ramdisk_folios", NULL);
 
 	for (i = 0; i < rd_nr; i++) {
 		err = brd_alloc(i);
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/5] brd: abstract page_size conventions
  2023-03-06 12:01 [PATCH 0/5] brd: Allow to change block sizes Hannes Reinecke
  2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
@ 2023-03-06 12:01 ` Hannes Reinecke
  2023-03-06 12:01 ` [PATCH 3/5] brd: make sector size configurable Hannes Reinecke
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-06 12:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch,
	Hannes Reinecke

In preparation for changing the block sizes abstract away references
to PAGE_SIZE and friends.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/brd.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 7efc276c4963..fc41ea641dfb 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -55,6 +55,7 @@ static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
 {
 	pgoff_t idx;
 	struct folio *folio;
+	unsigned int rd_sector_shift = PAGE_SHIFT - SECTOR_SHIFT;
 
 	/*
 	 * The folio lifetime is protected by the fact that we have opened the
@@ -68,7 +69,7 @@ static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
 	 * here, only deletes).
 	 */
 	rcu_read_lock();
-	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to folio index */
+	idx = sector >> rd_sector_shift; /* sector to folio index */
 	folio = radix_tree_lookup(&brd->brd_folios, idx);
 	rcu_read_unlock();
 
@@ -84,13 +85,15 @@ static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
 {
 	pgoff_t idx;
 	struct folio *folio;
+	unsigned int rd_sector_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sector_order = get_order(PAGE_SIZE);
 	int ret = 0;
 
 	folio = brd_lookup_folio(brd, sector);
 	if (folio)
 		return 0;
 
-	folio = folio_alloc(gfp | __GFP_ZERO, 0);
+	folio = folio_alloc(gfp | __GFP_ZERO, rd_sector_order);
 	if (!folio)
 		return -ENOMEM;
 
@@ -100,7 +103,7 @@ static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
 	}
 
 	spin_lock(&brd->brd_lock);
-	idx = sector >> PAGE_SECTORS_SHIFT;
+	idx = sector >> rd_sector_shift;
 	folio->index = idx;
 	if (radix_tree_insert(&brd->brd_folios, idx, folio)) {
 		folio_put(folio);
@@ -167,11 +170,14 @@ static void brd_free_folios(struct brd_device *brd)
 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 			     gfp_t gfp)
 {
-	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sectors = 1 << rd_sectors_shift;
+	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
 	size_t copy;
 	int ret;
 
-	copy = min_t(size_t, n, PAGE_SIZE - offset);
+	copy = min_t(size_t, n, rd_sector_size - offset);
 	ret = brd_insert_folio(brd, sector, gfp);
 	if (ret)
 		return ret;
@@ -190,10 +196,13 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
 {
 	struct folio *folio;
 	void *dst;
-	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sectors = 1 << rd_sectors_shift;
+	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
 	size_t copy;
 
-	copy = min_t(size_t, n, PAGE_SIZE - offset);
+	copy = min_t(size_t, n, rd_sector_size - offset);
 	folio = brd_lookup_folio(brd, sector);
 	BUG_ON(!folio);
 
@@ -222,10 +231,13 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 {
 	struct folio *folio;
 	void *src;
-	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sectors = 1 << rd_sectors_shift;
+	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
 	size_t copy;
 
-	copy = min_t(size_t, n, PAGE_SIZE - offset);
+	copy = min_t(size_t, n, rd_sector_size - offset);
 	folio = brd_lookup_folio(brd, sector);
 	if (folio) {
 		src = kmap_local_folio(folio, offset);
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/5] brd: make sector size configurable
  2023-03-06 12:01 [PATCH 0/5] brd: Allow to change block sizes Hannes Reinecke
  2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
  2023-03-06 12:01 ` [PATCH 2/5] brd: abstract page_size conventions Hannes Reinecke
@ 2023-03-06 12:01 ` Hannes Reinecke
  2023-03-09  3:12   ` Luis Chamberlain
  2023-03-06 12:01 ` [PATCH 4/5] brd: limit maximal block size to 32M Hannes Reinecke
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-06 12:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch,
	Hannes Reinecke

Add a module option 'rd_blksize' to allow the user to change
the sector size of the RAM disks.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/brd.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index fc41ea641dfb..11bac3c3f1b6 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -30,7 +30,7 @@
 /*
  * Each block ramdisk device has a radix_tree brd_folios of folios that stores
  * the folios containing the block device's contents. A brd folio's ->index is
- * its offset in PAGE_SIZE units. This is similar to, but in no way connected
+ * its offset in brd_blksize units. This is similar to, but in no way connected
  * with, the kernel's pagecache or buffer cache (which sit above our block
  * device).
  */
@@ -46,6 +46,8 @@ struct brd_device {
 	spinlock_t		brd_lock;
 	struct radix_tree_root	brd_folios;
 	u64			brd_nr_folios;
+	unsigned int		brd_sector_shift;
+	unsigned int		brd_sector_size;
 };
 
 /*
@@ -55,7 +57,7 @@ static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
 {
 	pgoff_t idx;
 	struct folio *folio;
-	unsigned int rd_sector_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sector_shift = brd->brd_sector_shift - SECTOR_SHIFT;
 
 	/*
 	 * The folio lifetime is protected by the fact that we have opened the
@@ -85,8 +87,8 @@ static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
 {
 	pgoff_t idx;
 	struct folio *folio;
-	unsigned int rd_sector_shift = PAGE_SHIFT - SECTOR_SHIFT;
-	unsigned int rd_sector_order = get_order(PAGE_SIZE);
+	unsigned int rd_sector_shift = brd->brd_sector_shift - SECTOR_SHIFT;
+	unsigned int rd_sector_order = get_order(brd->brd_sector_size);
 	int ret = 0;
 
 	folio = brd_lookup_folio(brd, sector);
@@ -170,9 +172,9 @@ static void brd_free_folios(struct brd_device *brd)
 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 			     gfp_t gfp)
 {
-	unsigned int rd_sectors_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = brd->brd_sector_shift - SECTOR_SHIFT;
 	unsigned int rd_sectors = 1 << rd_sectors_shift;
-	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int rd_sector_size = brd->brd_sector_size;
 	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
 	size_t copy;
 	int ret;
@@ -196,9 +198,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
 {
 	struct folio *folio;
 	void *dst;
-	unsigned int rd_sectors_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = brd->brd_sector_shift - SECTOR_SHIFT;
 	unsigned int rd_sectors = 1 << rd_sectors_shift;
-	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int rd_sector_size = brd->brd_sector_size;
 	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
 	size_t copy;
 
@@ -231,9 +233,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 {
 	struct folio *folio;
 	void *src;
-	unsigned int rd_sectors_shift = PAGE_SHIFT - SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = brd->brd_sector_shift - SECTOR_SHIFT;
 	unsigned int rd_sectors = 1 << rd_sectors_shift;
-	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int rd_sector_size = brd->brd_sector_size;
 	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
 	size_t copy;
 
@@ -346,6 +348,10 @@ static int max_part = 1;
 module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
 
+static unsigned int rd_blksize = PAGE_SIZE;
+module_param(rd_blksize, uint, 0444);
+MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
+
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
 MODULE_ALIAS("rd");
@@ -382,6 +388,8 @@ static int brd_alloc(int i)
 		return -ENOMEM;
 	brd->brd_number		= i;
 	list_add_tail(&brd->brd_list, &brd_devices);
+	brd->brd_sector_shift = ilog2(rd_blksize);
+	brd->brd_sector_size = rd_blksize;
 
 	spin_lock_init(&brd->brd_lock);
 	INIT_RADIX_TREE(&brd->brd_folios, GFP_ATOMIC);
@@ -410,7 +418,7 @@ static int brd_alloc(int i)
 	 *  otherwise fdisk will align on 1M. Regardless this call
 	 *  is harmless)
 	 */
-	blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
+	blk_queue_physical_block_size(disk->queue, rd_blksize);
 
 	/* Tell the block layer that this is not a rotational device */
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/5] brd: limit maximal block size to 32M
  2023-03-06 12:01 [PATCH 0/5] brd: Allow to change block sizes Hannes Reinecke
                   ` (2 preceding siblings ...)
  2023-03-06 12:01 ` [PATCH 3/5] brd: make sector size configurable Hannes Reinecke
@ 2023-03-06 12:01 ` Hannes Reinecke
  2023-03-06 17:40   ` Matthew Wilcox
  2023-03-06 18:01   ` Keith Busch
  2023-03-06 12:01 ` [PATCH 5/5] brd: make logical sector size configurable Hannes Reinecke
       [not found] ` <CGME20230307114200eucas1p296a60514feb40c4a08f380cc28aeeb51@eucas1p2.samsung.com>
  5 siblings, 2 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-06 12:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch,
	Hannes Reinecke

Use an arbitrary limit of 32M for the maximal blocksize so as not
to overflow the page cache.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/brd.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 11bac3c3f1b6..1ed114cd5cb0 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -348,6 +348,7 @@ static int max_part = 1;
 module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
 
+#define RD_MAX_SECTOR_SIZE 65536
 static unsigned int rd_blksize = PAGE_SIZE;
 module_param(rd_blksize, uint, 0444);
 MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
@@ -410,15 +411,14 @@ static int brd_alloc(int i)
 	disk->private_data	= brd;
 	strscpy(disk->disk_name, buf, DISK_NAME_LEN);
 	set_capacity(disk, rd_size * 2);
-	
-	/*
-	 * This is so fdisk will align partitions on 4k, because of
-	 * direct_access API needing 4k alignment, returning a PFN
-	 * (This is only a problem on very small devices <= 4M,
-	 *  otherwise fdisk will align on 1M. Regardless this call
-	 *  is harmless)
-	 */
+
+	if (rd_blksize > RD_MAX_SECTOR_SIZE) {
+		/* Arbitrary limit maximum block size to 32M */
+		err = -EINVAL;
+		goto out_cleanup_disk;
+	}
 	blk_queue_physical_block_size(disk->queue, rd_blksize);
+	blk_queue_max_hw_sectors(disk->queue, RD_MAX_SECTOR_SIZE);
 
 	/* Tell the block layer that this is not a rotational device */
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 5/5] brd: make logical sector size configurable
  2023-03-06 12:01 [PATCH 0/5] brd: Allow to change block sizes Hannes Reinecke
                   ` (3 preceding siblings ...)
  2023-03-06 12:01 ` [PATCH 4/5] brd: limit maximal block size to 32M Hannes Reinecke
@ 2023-03-06 12:01 ` Hannes Reinecke
       [not found]   ` <CGME20230307090934eucas1p28d92f3fd8c13edcba8e5d3fa7de6bcab@eucas1p2.samsung.com>
       [not found]   ` <CGME20230517093158eucas1p2831fd21a6f66ae39c887ad91e098aa74@eucas1p2.samsung.com>
       [not found] ` <CGME20230307114200eucas1p296a60514feb40c4a08f380cc28aeeb51@eucas1p2.samsung.com>
  5 siblings, 2 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-06 12:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch,
	Hannes Reinecke

Add a module option 'rd_logical_blksize' to allow the user to change
the logical sector size of the RAM disks.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/brd.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 1ed114cd5cb0..dda791805aba 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -48,6 +48,8 @@ struct brd_device {
 	u64			brd_nr_folios;
 	unsigned int		brd_sector_shift;
 	unsigned int		brd_sector_size;
+	unsigned int		brd_logical_sector_shift;
+	unsigned int		brd_logical_sector_size;
 };
 
 /*
@@ -57,7 +59,7 @@ static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
 {
 	pgoff_t idx;
 	struct folio *folio;
-	unsigned int rd_sector_shift = brd->brd_sector_shift - SECTOR_SHIFT;
+	unsigned int rd_sector_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;
 
 	/*
 	 * The folio lifetime is protected by the fact that we have opened the
@@ -87,7 +89,7 @@ static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
 {
 	pgoff_t idx;
 	struct folio *folio;
-	unsigned int rd_sector_shift = brd->brd_sector_shift - SECTOR_SHIFT;
+	unsigned int rd_sector_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;
 	unsigned int rd_sector_order = get_order(brd->brd_sector_size);
 	int ret = 0;
 
@@ -172,10 +174,10 @@ static void brd_free_folios(struct brd_device *brd)
 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 			     gfp_t gfp)
 {
-	unsigned int rd_sectors_shift = brd->brd_sector_shift - SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;
 	unsigned int rd_sectors = 1 << rd_sectors_shift;
 	unsigned int rd_sector_size = brd->brd_sector_size;
-	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
+	unsigned int offset = (sector & (rd_sectors - 1)) << brd->brd_logical_sector_shift;
 	size_t copy;
 	int ret;
 
@@ -184,7 +186,7 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 	if (ret)
 		return ret;
 	if (copy < n) {
-		sector += copy >> SECTOR_SHIFT;
+		sector += copy >> brd->brd_logical_sector_shift;
 		ret = brd_insert_folio(brd, sector, gfp);
 	}
 	return ret;
@@ -198,10 +200,10 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
 {
 	struct folio *folio;
 	void *dst;
-	unsigned int rd_sectors_shift = brd->brd_sector_shift - SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;
 	unsigned int rd_sectors = 1 << rd_sectors_shift;
 	unsigned int rd_sector_size = brd->brd_sector_size;
-	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
+	unsigned int offset = (sector & (rd_sectors - 1)) << brd->brd_logical_sector_shift;
 	size_t copy;
 
 	copy = min_t(size_t, n, rd_sector_size - offset);
@@ -214,7 +216,7 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
 
 	if (copy < n) {
 		src += copy;
-		sector += copy >> SECTOR_SHIFT;
+		sector += copy >> brd->brd_logical_sector_shift;
 		copy = n - copy;
 		folio = brd_lookup_folio(brd, sector);
 		BUG_ON(!folio);
@@ -233,10 +235,10 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 {
 	struct folio *folio;
 	void *src;
-	unsigned int rd_sectors_shift = brd->brd_sector_shift - SECTOR_SHIFT;
+	unsigned int rd_sectors_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;
 	unsigned int rd_sectors = 1 << rd_sectors_shift;
 	unsigned int rd_sector_size = brd->brd_sector_size;
-	unsigned int offset = (sector & (rd_sectors - 1)) << SECTOR_SHIFT;
+	unsigned int offset = (sector & (rd_sectors - 1)) << brd->brd_logical_sector_shift;
 	size_t copy;
 
 	copy = min_t(size_t, n, rd_sector_size - offset);
@@ -250,7 +252,7 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
 
 	if (copy < n) {
 		dst += copy;
-		sector += copy >> SECTOR_SHIFT;
+		sector += copy >> brd->brd_logical_sector_shift;
 		copy = n - copy;
 		folio = brd_lookup_folio(brd, sector);
 		if (folio) {
@@ -309,8 +311,8 @@ static void brd_submit_bio(struct bio *bio)
 		int err;
 
 		/* Don't support un-aligned buffer */
-		WARN_ON_ONCE((iter.offset & (SECTOR_SIZE - 1)) ||
-				(len & (SECTOR_SIZE - 1)));
+		WARN_ON_ONCE((iter.offset & (brd->brd_logical_sector_size - 1)) ||
+				(len & (brd->brd_logical_sector_size - 1)));
 
 		err = brd_do_folio(brd, iter.folio, len, iter.offset,
 				   bio->bi_opf, sector);
@@ -322,7 +324,7 @@ static void brd_submit_bio(struct bio *bio)
 			bio_io_error(bio);
 			return;
 		}
-		sector += len >> SECTOR_SHIFT;
+		sector += len >> brd->brd_logical_sector_shift;
 	}
 
 	bio_endio(bio);
@@ -353,6 +355,10 @@ static unsigned int rd_blksize = PAGE_SIZE;
 module_param(rd_blksize, uint, 0444);
 MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
 
+static unsigned int rd_logical_blksize = SECTOR_SIZE;
+module_param(rd_logical_blksize, uint, 0444);
+MODULE_PARM_DESC(rd_logical_blksize, "Logical blocksize of each RAM disk in bytes.");
+
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
 MODULE_ALIAS("rd");
@@ -391,6 +397,8 @@ static int brd_alloc(int i)
 	list_add_tail(&brd->brd_list, &brd_devices);
 	brd->brd_sector_shift = ilog2(rd_blksize);
 	brd->brd_sector_size = rd_blksize;
+	brd->brd_logical_sector_shift = ilog2(rd_logical_blksize);
+	brd->brd_logical_sector_size = rd_logical_blksize;
 
 	spin_lock_init(&brd->brd_lock);
 	INIT_RADIX_TREE(&brd->brd_folios, GFP_ATOMIC);
@@ -418,6 +426,7 @@ static int brd_alloc(int i)
 		goto out_cleanup_disk;
 	}
 	blk_queue_physical_block_size(disk->queue, rd_blksize);
+	blk_queue_logical_block_size(disk->queue, rd_logical_blksize);
 	blk_queue_max_hw_sectors(disk->queue, RD_MAX_SECTOR_SIZE);
 
 	/* Tell the block layer that this is not a rotational device */
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
@ 2023-03-06 16:04   ` kernel test robot
  2023-03-06 17:37   ` Matthew Wilcox
  2023-03-09  2:29   ` Luis Chamberlain
  2 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2023-03-06 16:04 UTC (permalink / raw)
  To: Hannes Reinecke, Jens Axboe
  Cc: llvm, oe-kbuild-all, linux-block, Matthew Wilcox,
	Luis Chamberlain, Keith Busch, Hannes Reinecke

Hi Hannes,

I love your patch! Yet something to improve:

[auto build test ERROR on axboe-block/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Hannes-Reinecke/brd-convert-to-folios/20230306-200223
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20230306120127.21375-2-hare%40suse.de
patch subject: [PATCH 1/5] brd: convert to folios
config: hexagon-randconfig-r045-20230306 (https://download.01.org/0day-ci/archive/20230306/202303062339.fe53AMz1-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project 67409911353323ca5edf2049ef0df54132fa1ca7)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/b29fb9873ddbb2efb157e5d6548abf3c88b3458c
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Hannes-Reinecke/brd-convert-to-folios/20230306-200223
        git checkout b29fb9873ddbb2efb157e5d6548abf3c88b3458c
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash drivers/block/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303062339.fe53AMz1-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from drivers/block/brd.c:17:
   In file included from include/linux/blkdev.h:9:
   In file included from include/linux/blk_types.h:10:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
                                                     ^
   In file included from drivers/block/brd.c:17:
   In file included from include/linux/blkdev.h:9:
   In file included from include/linux/blk_types.h:10:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
                                                     ^
   In file included from drivers/block/brd.c:17:
   In file included from include/linux/blkdev.h:9:
   In file included from include/linux/blk_types.h:10:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
>> drivers/block/brd.c:325:8: error: call to undeclared function 'brd_do_bvec'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
           err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
                 ^
   6 warnings and 1 error generated.


vim +/brd_do_bvec +325 drivers/block/brd.c

9db5579be4bb53 Nicholas Piggin 2008-02-08  316  
a72132c31d5809 Matthew Wilcox  2014-06-04  317  static int brd_rw_page(struct block_device *bdev, sector_t sector,
86947df3a92364 Bart Van Assche 2022-07-14  318  		       struct page *page, enum req_op op)
a72132c31d5809 Matthew Wilcox  2014-06-04  319  {
a72132c31d5809 Matthew Wilcox  2014-06-04  320  	struct brd_device *brd = bdev->bd_disk->private_data;
98cc093cba1e92 Huang Ying      2017-09-06  321  	int err;
98cc093cba1e92 Huang Ying      2017-09-06  322  
98cc093cba1e92 Huang Ying      2017-09-06  323  	if (PageTransHuge(page))
98cc093cba1e92 Huang Ying      2017-09-06  324  		return -ENOTSUPP;
3f289dcb4b2654 Tejun Heo       2018-07-18 @325  	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
3f289dcb4b2654 Tejun Heo       2018-07-18  326  	page_endio(page, op_is_write(op), err);
a72132c31d5809 Matthew Wilcox  2014-06-04  327  	return err;
a72132c31d5809 Matthew Wilcox  2014-06-04  328  }
a72132c31d5809 Matthew Wilcox  2014-06-04  329  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
  2023-03-06 16:04   ` kernel test robot
@ 2023-03-06 17:37   ` Matthew Wilcox
  2023-03-07  6:55     ` Hannes Reinecke
  2023-03-09  2:29   ` Luis Chamberlain
  2 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2023-03-06 17:37 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Jens Axboe, linux-block, Luis Chamberlain, Keith Busch

On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
> -	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> -	if (!page)
> +	folio = folio_alloc(gfp | __GFP_ZERO, 0);
> +	if (!folio)

Did you drop HIGHMEM support on purpose?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/5] brd: limit maximal block size to 32M
  2023-03-06 12:01 ` [PATCH 4/5] brd: limit maximal block size to 32M Hannes Reinecke
@ 2023-03-06 17:40   ` Matthew Wilcox
  2023-03-07  6:56     ` Hannes Reinecke
  2023-03-06 18:01   ` Keith Busch
  1 sibling, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2023-03-06 17:40 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Jens Axboe, linux-block, Luis Chamberlain, Keith Busch

On Mon, Mar 06, 2023 at 01:01:26PM +0100, Hannes Reinecke wrote:
> Use an arbitrary limit of 32M for the maximal blocksize so as not
> to overflow the page cache.

The page allocator tops out at MAX_ORDER (generally 4MB), so that might
be a better limit.  Also, this has nothing to do with the page cache,
right?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/5] brd: limit maximal block size to 32M
  2023-03-06 12:01 ` [PATCH 4/5] brd: limit maximal block size to 32M Hannes Reinecke
  2023-03-06 17:40   ` Matthew Wilcox
@ 2023-03-06 18:01   ` Keith Busch
  2023-03-07  6:57     ` Hannes Reinecke
  1 sibling, 1 reply; 26+ messages in thread
From: Keith Busch @ 2023-03-06 18:01 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Jens Axboe, linux-block, Matthew Wilcox, Luis Chamberlain

On Mon, Mar 06, 2023 at 01:01:26PM +0100, Hannes Reinecke wrote:
> Use an arbitrary limit of 32M for the maximal blocksize so as not
> to overflow the page cache.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/block/brd.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index 11bac3c3f1b6..1ed114cd5cb0 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -348,6 +348,7 @@ static int max_part = 1;
>  module_param(max_part, int, 0444);
>  MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>  
> +#define RD_MAX_SECTOR_SIZE 65536
>  static unsigned int rd_blksize = PAGE_SIZE;
>  module_param(rd_blksize, uint, 0444);
>  MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
> @@ -410,15 +411,14 @@ static int brd_alloc(int i)
>  	disk->private_data	= brd;
>  	strscpy(disk->disk_name, buf, DISK_NAME_LEN);
>  	set_capacity(disk, rd_size * 2);
> -	
> -	/*
> -	 * This is so fdisk will align partitions on 4k, because of
> -	 * direct_access API needing 4k alignment, returning a PFN
> -	 * (This is only a problem on very small devices <= 4M,
> -	 *  otherwise fdisk will align on 1M. Regardless this call
> -	 *  is harmless)
> -	 */
> +
> +	if (rd_blksize > RD_MAX_SECTOR_SIZE) {

rd_blkside is in bytes, but the above is ccomapring it to something in units of
SECTOR_SIZE.

> +		/* Arbitrary limit maximum block size to 32M */
> +		err = -EINVAL;
> +		goto out_cleanup_disk;
> +	}

rd_blksize also needs to be >= 512, and a power of 2.

>  	blk_queue_physical_block_size(disk->queue, rd_blksize);
> +	blk_queue_max_hw_sectors(disk->queue, RD_MAX_SECTOR_SIZE);
>  
>  	/* Tell the block layer that this is not a rotational device */
>  	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-06 17:37   ` Matthew Wilcox
@ 2023-03-07  6:55     ` Hannes Reinecke
  2023-03-07  7:30       ` Matthew Wilcox
  0 siblings, 1 reply; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-07  6:55 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Jens Axboe, linux-block, Luis Chamberlain, Keith Busch

On 3/6/23 18:37, Matthew Wilcox wrote:
> On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
>> -	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
>> -	if (!page)
>> +	folio = folio_alloc(gfp | __GFP_ZERO, 0);
>> +	if (!folio)
> 
> Did you drop HIGHMEM support on purpose?

No; I thought that folios would be doing that implicitely.
Will be re-adding.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/5] brd: limit maximal block size to 32M
  2023-03-06 17:40   ` Matthew Wilcox
@ 2023-03-07  6:56     ` Hannes Reinecke
  0 siblings, 0 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-07  6:56 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Jens Axboe, linux-block, Luis Chamberlain, Keith Busch

On 3/6/23 18:40, Matthew Wilcox wrote:
> On Mon, Mar 06, 2023 at 01:01:26PM +0100, Hannes Reinecke wrote:
>> Use an arbitrary limit of 32M for the maximal blocksize so as not
>> to overflow the page cache.
> 
> The page allocator tops out at MAX_ORDER (generally 4MB), so that might
> be a better limit.  Also, this has nothing to do with the page cache,
> right?

Indeed. Sloppy description.
Will be fixing it up.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/5] brd: limit maximal block size to 32M
  2023-03-06 18:01   ` Keith Busch
@ 2023-03-07  6:57     ` Hannes Reinecke
  0 siblings, 0 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-07  6:57 UTC (permalink / raw)
  To: Keith Busch; +Cc: Jens Axboe, linux-block, Matthew Wilcox, Luis Chamberlain

On 3/6/23 19:01, Keith Busch wrote:
> On Mon, Mar 06, 2023 at 01:01:26PM +0100, Hannes Reinecke wrote:
>> Use an arbitrary limit of 32M for the maximal blocksize so as not
>> to overflow the page cache.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>   drivers/block/brd.c | 16 ++++++++--------
>>   1 file changed, 8 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
>> index 11bac3c3f1b6..1ed114cd5cb0 100644
>> --- a/drivers/block/brd.c
>> +++ b/drivers/block/brd.c
>> @@ -348,6 +348,7 @@ static int max_part = 1;
>>   module_param(max_part, int, 0444);
>>   MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>>   
>> +#define RD_MAX_SECTOR_SIZE 65536
>>   static unsigned int rd_blksize = PAGE_SIZE;
>>   module_param(rd_blksize, uint, 0444);
>>   MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
>> @@ -410,15 +411,14 @@ static int brd_alloc(int i)
>>   	disk->private_data	= brd;
>>   	strscpy(disk->disk_name, buf, DISK_NAME_LEN);
>>   	set_capacity(disk, rd_size * 2);
>> -	
>> -	/*
>> -	 * This is so fdisk will align partitions on 4k, because of
>> -	 * direct_access API needing 4k alignment, returning a PFN
>> -	 * (This is only a problem on very small devices <= 4M,
>> -	 *  otherwise fdisk will align on 1M. Regardless this call
>> -	 *  is harmless)
>> -	 */
>> +
>> +	if (rd_blksize > RD_MAX_SECTOR_SIZE) {
> 
> rd_blkside is in bytes, but the above is ccomapring it to something in units of
> SECTOR_SIZE.
> 
Ok.

>> +		/* Arbitrary limit maximum block size to 32M */
>> +		err = -EINVAL;
>> +		goto out_cleanup_disk;
>> +	}
> 
> rd_blksize also needs to be >= 512, and a power of 2.
> 
Yes; I'll be adding the checks accordingly.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-07  6:55     ` Hannes Reinecke
@ 2023-03-07  7:30       ` Matthew Wilcox
  2023-03-09  3:28         ` Luis Chamberlain
       [not found]         ` <a4489f7b-912c-e68f-4a4c-c14d96026bd6@suse.de>
  0 siblings, 2 replies; 26+ messages in thread
From: Matthew Wilcox @ 2023-03-07  7:30 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Jens Axboe, linux-block, Luis Chamberlain, Keith Busch

On Tue, Mar 07, 2023 at 07:55:32AM +0100, Hannes Reinecke wrote:
> On 3/6/23 18:37, Matthew Wilcox wrote:
> > On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
> > > -	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> > > -	if (!page)
> > > +	folio = folio_alloc(gfp | __GFP_ZERO, 0);
> > > +	if (!folio)
> > 
> > Did you drop HIGHMEM support on purpose?
> 
> No; I thought that folios would be doing that implicitely.
> Will be re-adding.

We can't ... not all filesystems want to allocate every folio from
HIGHMEM.  eg for superblocks, it often makes more sense to allocate the
folio from lowmem than allocate it from highmem and keep it kmapped.
The only GFP flag that folios force-set is __GFP_COMP because folios by
definition are compound pages.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/5] brd: make logical sector size configurable
       [not found]   ` <CGME20230307090934eucas1p28d92f3fd8c13edcba8e5d3fa7de6bcab@eucas1p2.samsung.com>
@ 2023-03-07  9:01     ` Pankaj Raghav
  2023-03-07 11:06       ` Hannes Reinecke
  0 siblings, 1 reply; 26+ messages in thread
From: Pankaj Raghav @ 2023-03-07  9:01 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Luis Chamberlain,
	Keith Busch, p.raghav

> @@ -57,7 +59,7 @@ static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
>  {
>  	pgoff_t idx;
>  	struct folio *folio;
> -	unsigned int rd_sector_shift = brd->brd_sector_shift - SECTOR_SHIFT;
> +	unsigned int rd_sector_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;

Could we create a simple macro instead of repeating this everywhere?
#define RD_SECTOR_SHIFT(brd) (brd->brd_sector_shift - brd->brd_logical_sector_shift) 

>  
>  	/*
>  	 * The folio lifetime is protected by the fact that we have opened the
>  			bio_io_error(bio);
>  			return;
>  		}
> -		sector += len >> SECTOR_SHIFT;
> +		sector += len >> brd->brd_logical_sector_shift;
>  	}
>  
>  	bio_endio(bio);
> @@ -353,6 +355,10 @@ static unsigned int rd_blksize = PAGE_SIZE;
>  module_param(rd_blksize, uint, 0444);
>  MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
>  
> +static unsigned int rd_logical_blksize = SECTOR_SIZE;
> +module_param(rd_logical_blksize, uint, 0444);
> +MODULE_PARM_DESC(rd_logical_blksize, "Logical blocksize of each RAM disk in bytes.");
> +
>  MODULE_LICENSE("GPL");
>  MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
>  MODULE_ALIAS("rd");
> @@ -391,6 +397,8 @@ static int brd_alloc(int i)
>  	list_add_tail(&brd->brd_list, &brd_devices);
>  	brd->brd_sector_shift = ilog2(rd_blksize);
>  	brd->brd_sector_size = rd_blksize;
> +	brd->brd_logical_sector_shift = ilog2(rd_logical_blksize);
> +	brd->brd_logical_sector_size = rd_logical_blksize;

We should a check here to see if logical block > rd_blksize similar
to what is done in blk_queue_logical_block_size()?

// physical block size should not be less than the logical block size
if (rd_blksize < rd_logical_blksize) {
	brd->brd_logical_sector_shift = ilog2(rd_blksize);
	brd->brd_logical_sector_size = rd_blksize;
 }

>  
>  	spin_lock_init(&brd->brd_lock);
>  	INIT_RADIX_TREE(&brd->brd_folios, GFP_ATOMIC);
> @@ -418,6 +426,7 @@ static int brd_alloc(int i)
>  		goto out_cleanup_disk;
>  	}
>  	blk_queue_physical_block_size(disk->queue, rd_blksize);
> +	blk_queue_logical_block_size(disk->queue, rd_logical_blksize);
>  	blk_queue_max_hw_sectors(disk->queue, RD_MAX_SECTOR_SIZE);
>  
>  	/* Tell the block layer that this is not a rotational device */
> -- 
> 2.35.3
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/5] brd: make logical sector size configurable
  2023-03-07  9:01     ` Pankaj Raghav
@ 2023-03-07 11:06       ` Hannes Reinecke
  0 siblings, 0 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-07 11:06 UTC (permalink / raw)
  To: Pankaj Raghav
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch

On 3/7/23 10:01, Pankaj Raghav wrote:
>> @@ -57,7 +59,7 @@ static struct folio *brd_lookup_folio(struct brd_device *brd, sector_t sector)
>>   {
>>   	pgoff_t idx;
>>   	struct folio *folio;
>> -	unsigned int rd_sector_shift = brd->brd_sector_shift - SECTOR_SHIFT;
>> +	unsigned int rd_sector_shift = brd->brd_sector_shift - brd->brd_logical_sector_shift;
> 
> Could we create a simple macro instead of repeating this everywhere?
> #define RD_SECTOR_SHIFT(brd) (brd->brd_sector_shift - brd->brd_logical_sector_shift)
> 
Yeah, I'm not utterly happy with that one, too; this patchset is 
primarily a mechanical conversion to avoid errors.
Will be changing it.

>>   
>>   	/*
>>   	 * The folio lifetime is protected by the fact that we have opened the
>>   			bio_io_error(bio);
>>   			return;
>>   		}
>> -		sector += len >> SECTOR_SHIFT;
>> +		sector += len >> brd->brd_logical_sector_shift;
>>   	}
>>   
>>   	bio_endio(bio);
>> @@ -353,6 +355,10 @@ static unsigned int rd_blksize = PAGE_SIZE;
>>   module_param(rd_blksize, uint, 0444);
>>   MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
>>   
>> +static unsigned int rd_logical_blksize = SECTOR_SIZE;
>> +module_param(rd_logical_blksize, uint, 0444);
>> +MODULE_PARM_DESC(rd_logical_blksize, "Logical blocksize of each RAM disk in bytes.");
>> +
>>   MODULE_LICENSE("GPL");
>>   MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
>>   MODULE_ALIAS("rd");
>> @@ -391,6 +397,8 @@ static int brd_alloc(int i)
>>   	list_add_tail(&brd->brd_list, &brd_devices);
>>   	brd->brd_sector_shift = ilog2(rd_blksize);
>>   	brd->brd_sector_size = rd_blksize;
>> +	brd->brd_logical_sector_shift = ilog2(rd_logical_blksize);
>> +	brd->brd_logical_sector_size = rd_logical_blksize;
> 
> We should a check here to see if logical block > rd_blksize similar
> to what is done in blk_queue_logical_block_size()?
>  > // physical block size should not be less than the logical block size
> if (rd_blksize < rd_logical_blksize) {
> 	brd->brd_logical_sector_shift = ilog2(rd_blksize);
> 	brd->brd_logical_sector_size = rd_blksize;
>   }
> 
Sure. Keith already complained about it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Frankenstr. 146, 90461 Nürnberg
Managing Directors: I. Totev, A. Myers, A. McDonald, M. B. Moerman
(HRB 36809, AG Nürnberg)


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/5] brd: Allow to change block sizes
       [not found] ` <CGME20230307114200eucas1p296a60514feb40c4a08f380cc28aeeb51@eucas1p2.samsung.com>
@ 2023-03-07 11:33   ` Pankaj Raghav
  0 siblings, 0 replies; 26+ messages in thread
From: Pankaj Raghav @ 2023-03-07 11:33 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Luis Chamberlain, Keith Busch

Hi Hannes,

On Mon, Mar 06, 2023 at 01:01:22PM +0100, Hannes Reinecke wrote:
> Hi all,
> 
> meat to the bone: with this patchset one can change the physical and
> logical block size of the 'brd' ramdisk driver.
> Default is 512 (for both); one can easily increase the physical block
> size to 16k and the logical block size to 4k.
> Increasing the logcial block size beyond 4k gives some 'interesting'
> crashes.

I did something similar for an internal prototype to test large block
size. I ran the perf test suite I created on your changes and I can
clearly see a perf increase for large block IOs.

I enabled huge pages and used the iomem=mmaphuge option in fio to test
large block IOs. Here are my results:

base: next-20230307
new: base with your changes on top

bw: bandwith in MiB/s

For each set of rd_blksize and rd_logical_blksize, I ran the test with
different fio blocksize as indicated by `io_uring_iod_128_bs_4k`

rd_blksize=8192 and rd_logical_blksize=4096
+------------------------+-------------+------------+-------+----------+---------+-------+
| io_uring_iod_128_bs_4k | base[kiops] | new[kiops] | delta | base[bw] | new[bw] | delta |
+------------------------+-------------+------------+-------+----------+---------+-------+
|          read          |     567     |    605     |  6.7  |   2214   |  2362   | 6.68  |
|        randread        |     517     |    529     | 2.32  |   2019   |  2066   | 2.33  |
|         write          |     551     |    558     | 1.27  |   2154   |  2179   | 1.16  |
|       randwrite        |     481     |    502     | 4.37  |   1880   |  1962   | 4.36  |
+------------------------+-------------+------------+-------+----------+---------+-------+
+------------------------+-------------+------------+-------+----------+---------+-------+
| io_uring_iod_128_bs_8k | base[kiops] | new[kiops] | delta | base[bw] | new[bw] | delta |
+------------------------+-------------+------------+-------+----------+---------+-------+
|          read          |     462     |    512     | 10.82 |   3611   |  3997   | 10.69 |
|        randread        |     426     |    445     | 4.46  |   3326   |  3480   | 4.63  |
|         write          |     442     |    472     | 6.79  |   3454   |  3687   | 6.75  |
|       randwrite        |     401     |    426     | 6.23  |   3134   |  3328   | 6.19  |
+------------------------+-------------+------------+-------+----------+---------+-------+
+-------------------------+-------------+------------+-------+----------+---------+-------+
| io_uring_iod_128_bs_16k | base[kiops] | new[kiops] | delta | base[bw] | new[bw] | delta |
+-------------------------+-------------+------------+-------+----------+---------+-------+
|          read           |     343     |    390     | 13.7  |   5360   |  6086   | 13.54 |
|        randread         |     317     |    364     | 14.83 |   4946   |  5694   | 15.12 |
|          write          |     335     |    346     | 3.28  |   5235   |  5414   | 3.42  |
|        randwrite        |     305     |    327     | 7.21  |   4759   |  5106   | 7.29  |
+-------------------------+-------------+------------+-------+----------+---------+-------+

rd_blksize=16384 and rd_logical_blksize=4096
+------------------------+-------------+------------+-------+----------+---------+-------+
| io_uring_iod_128_bs_4k | base[kiops] | new[kiops] | delta | base[bw] | new[bw] | delta |
+------------------------+-------------+------------+-------+----------+---------+-------+
|          read          |     576     |    586     | 1.74  |   2250   |  2291   | 1.82  |
|        randread        |     524     |    548     | 4.58  |   2046   |  2139   | 4.55  |
|         write          |     533     |    545     | 2.25  |   2081   |  2129   | 2.31  |
|       randwrite        |     484     |    496     | 2.48  |   1892   |  1938   | 2.43  |
+------------------------+-------------+------------+-------+----------+---------+-------+
+------------------------+-------------+------------+-------+----------+---------+-------+
| io_uring_iod_128_bs_8k | base[kiops] | new[kiops] | delta | base[bw] | new[bw] | delta |
+------------------------+-------------+------------+-------+----------+---------+-------+
|          read          |     461     |    491     | 6.51  |   3601   |  3836   | 6.53  |
|        randread        |     425     |    472     | 11.06 |   3323   |  3684   | 10.86 |
|         write          |     454     |    465     | 2.42  |   3543   |  3632   | 2.51  |
|       randwrite        |     395     |    430     | 8.86  |   3086   |  3357   | 8.78  |
+------------------------+-------------+------------+-------+----------+---------+-------+
+-------------------------+-------------+------------+-------+----------+---------+-------+
| io_uring_iod_128_bs_16k | base[kiops] | new[kiops] | delta | base[bw] | new[bw] | delta |
+-------------------------+-------------+------------+-------+----------+---------+-------+
|          read           |     338     |    400     | 18.34 |   5282   |  6255   | 18.42 |
|        randread         |     317     |    384     | 21.14 |   4959   |  5997   | 20.93 |
|          write          |     335     |    354     | 5.67  |   5239   |  5525   | 5.46  |
|        randwrite        |     303     |    326     | 7.59  |   4728   |  5097   |  7.8  |
+-------------------------+-------------+------------+-------+----------+---------+-------+


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
  2023-03-06 16:04   ` kernel test robot
  2023-03-06 17:37   ` Matthew Wilcox
@ 2023-03-09  2:29   ` Luis Chamberlain
  2 siblings, 0 replies; 26+ messages in thread
From: Luis Chamberlain @ 2023-03-09  2:29 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Keith Busch,
	Pankaj Raghav, Daniel Gomez

On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
> Convert the driver to work on folios instead of pages.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/block/brd.c | 171 ++++++++++++++++++++++----------------------
>  1 file changed, 85 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index 34177f1bd97d..7efc276c4963 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -28,8 +28,8 @@
>  #include <linux/uaccess.h>
>  
>  /*
> - * Each block ramdisk device has a radix_tree brd_pages of pages that stores
> - * the pages containing the block device's contents. A brd page's ->index is
> + * Each block ramdisk device has a radix_tree brd_folios of folios that stores
                                                            ^^^^^^^^^
> + * the folios containing the block device's contents. A brd folio's ->index is
      ^^^^^^^^^^

So we end up with:

"a radix_tree brd_folios of folios that stores the folios containing ..."

What about:

"a radix_tree brd_folios that stores folios containing"

Other than that, looks good:

Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>

So another thing, I think I counted about 5-8 grammatical rules which
could be bundled up into *one* SmPL grammar patch which could then be
used to automatically do similar tasks elsewhere.

  Luis

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/5] brd: make sector size configurable
  2023-03-06 12:01 ` [PATCH 3/5] brd: make sector size configurable Hannes Reinecke
@ 2023-03-09  3:12   ` Luis Chamberlain
  2023-03-20 22:52     ` Luis Chamberlain
  0 siblings, 1 reply; 26+ messages in thread
From: Luis Chamberlain @ 2023-03-09  3:12 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Keith Busch,
	Davidlohr Bueso, Fan Ni, Pankaj Raghav, Daniel Gomez,
	Boaz Harrosh, Adam Manzanares

On Mon, Mar 06, 2023 at 01:01:25PM +0100, Hannes Reinecke wrote:
> Add a module option 'rd_blksize' to allow the user to change
> the sector size of the RAM disks.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/block/brd.c | 30 +++++++++++++++++++-----------
>  1 file changed, 19 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index fc41ea641dfb..11bac3c3f1b6 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -46,6 +46,8 @@ struct brd_device {
>  	spinlock_t		brd_lock;
>  	struct radix_tree_root	brd_folios;
>  	u64			brd_nr_folios;
> +	unsigned int		brd_sector_shift;
> +	unsigned int		brd_sector_size;
>  };

Why not just do this first and initialize this to the defaults set
without the module parameter. Then you don't need to declare over
and over unsigned int rd_sectors, etc in tons of routines and just
pass the brd which most routines get.

Then most of this and the prior patch can be squeezed into one.

The functional changes would be the addition of the module parameter.

> @@ -410,7 +418,7 @@ static int brd_alloc(int i)
>  	 *  otherwise fdisk will align on 1M. Regardless this call
>  	 *  is harmless)
>  	 */
> -	blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
> +	blk_queue_physical_block_size(disk->queue, rd_blksize);

And this was added for DAX support, but DAX support was long removed
from brd see commit 7a862fbbdec6 ("brd: remove dax support"). This
nugget was added by Boaz via commit c8fa31730fc72a8 ("brd: Request from
fdisk 4k alignment"), but if we don't support DAX, can't we just kill
that line?

Current doc for blk_queue_physical_block_size() says:

*   This should be set to the lowest possible sector size that the             
*   hardware can operate on without reverting to read-modify-write             
*   operations.  

But since we're working directly with RAM, do we care?

The comment above that line referring to direct_access should be killed
at the very least.

While you're at it, can you then also update Documentation/filesystems/dax.rst
to remove the brd as an example driver with DAX support.

That leaves us with only a few block drivers with DAX:

- dcssblk: s390 dcss block device driver                                        
- pmem: NVDIMM persistent memory driver  
- some dm targets
- fuse virtio_fs

Wonder which is the right example these days for DAX, now that
persistant memory has End of Life'd with Optane dead, curious also of
the value of the above and DAX in general other than support for
whatever made it out.

Should we EOL DAX too?

  Luis

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-07  7:30       ` Matthew Wilcox
@ 2023-03-09  3:28         ` Luis Chamberlain
       [not found]         ` <a4489f7b-912c-e68f-4a4c-c14d96026bd6@suse.de>
  1 sibling, 0 replies; 26+ messages in thread
From: Luis Chamberlain @ 2023-03-09  3:28 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Hannes Reinecke, Jens Axboe, linux-block, Keith Busch

On Tue, Mar 07, 2023 at 07:30:37AM +0000, Matthew Wilcox wrote:
> On Tue, Mar 07, 2023 at 07:55:32AM +0100, Hannes Reinecke wrote:
> > On 3/6/23 18:37, Matthew Wilcox wrote:
> > > On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
> > > > -	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> > > > -	if (!page)
> > > > +	folio = folio_alloc(gfp | __GFP_ZERO, 0);
> > > > +	if (!folio)
> > > 
> > > Did you drop HIGHMEM support on purpose?
> > 
> > No; I thought that folios would be doing that implicitely.
> > Will be re-adding.
> 
> We can't ... not all filesystems want to allocate every folio from
> HIGHMEM.  eg for superblocks, it often makes more sense to allocate the
> folio from lowmem than allocate it from highmem and keep it kmapped.
> The only GFP flag that folios force-set is __GFP_COMP because folios by
> definition are compound pages.

Just some historical information here. Some commit logs would seem
to make it seem that __GFP_HIGHMEM was added to support DAX, but it's
not the case. When DAX suport was removed from brd the __GFP_HIGHMEM
removed was removed by mistake, and later fixed through commit
f6b50160a06d4 ("brd: re-enable __GFP_HIGHMEM in brd_insert_page()").
But that doesn't tell us what the default were before.

To avoid future removals maybe we should document why we care?

See these commits:

9db5579be4bb5 ("rewrite rd")               initial commit with highmem
cb9cd2ef0bbb4 ("rd: support XIP")          forces highmem if XIP, but already set
a7a97fc9ff6c2 ("brd: rename XIP to DAX")   renames the config to DAX
26defe34e48e1 ("fix brd allocation flags") tries to fix the confusion but makes it worse

And so commit f6b50160a06d4 ("brd: re-enable __GFP_HIGHMEM in brd_insert_page()")
seems to be correct in re-adding it, adn commit cb9cd2ef0bbb4 ("rd: support XIP")
made things confusing assuming we wanted only highmem when DAX was
enabled.

So we only want highmem just because it's been there since it was first added.
That's all.

  Luis

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/5] brd: make sector size configurable
  2023-03-09  3:12   ` Luis Chamberlain
@ 2023-03-20 22:52     ` Luis Chamberlain
  2023-03-20 23:40       ` Martin K. Petersen
  0 siblings, 1 reply; 26+ messages in thread
From: Luis Chamberlain @ 2023-03-20 22:52 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen, Dan Williams
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Keith Busch,
	Davidlohr Bueso, Fan Ni, Pankaj Raghav, Daniel Gomez,
	Boaz Harrosh, Adam Manzanares

On Wed, Mar 08, 2023 at 07:12:40PM -0800, Luis Chamberlain wrote:
> On Mon, Mar 06, 2023 at 01:01:25PM +0100, Hannes Reinecke wrote:
> > Add a module option 'rd_blksize' to allow the user to change
> > the sector size of the RAM disks.
> > 
> > Signed-off-by: Hannes Reinecke <hare@suse.de>
> > ---
> >  drivers/block/brd.c | 30 +++++++++++++++++++-----------
> >  1 file changed, 19 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> > index fc41ea641dfb..11bac3c3f1b6 100644
> > --- a/drivers/block/brd.c
> > +++ b/drivers/block/brd.c
> > @@ -46,6 +46,8 @@ struct brd_device {
> >  	spinlock_t		brd_lock;
> >  	struct radix_tree_root	brd_folios;
> >  	u64			brd_nr_folios;
> > +	unsigned int		brd_sector_shift;
> > +	unsigned int		brd_sector_size;
> >  };
> 
> Why not just do this first and initialize this to the defaults set
> without the module parameter. Then you don't need to declare over
> and over unsigned int rd_sectors, etc in tons of routines and just
> pass the brd which most routines get.
> 
> Then most of this and the prior patch can be squeezed into one.
> 
> The functional changes would be the addition of the module parameter.
> 
> > @@ -410,7 +418,7 @@ static int brd_alloc(int i)
> >  	 *  otherwise fdisk will align on 1M. Regardless this call
> >  	 *  is harmless)
> >  	 */
> > -	blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
> > +	blk_queue_physical_block_size(disk->queue, rd_blksize);
> 
> And this was added for DAX support, but DAX support was long removed
> from brd see commit 7a862fbbdec6 ("brd: remove dax support"). This
> nugget was added by Boaz via commit c8fa31730fc72a8 ("brd: Request from
> fdisk 4k alignment"), but if we don't support DAX, can't we just kill
> that line?
> 
> Current doc for blk_queue_physical_block_size() says:
> 
> *   This should be set to the lowest possible sector size that the             
> *   hardware can operate on without reverting to read-modify-write             
> *   operations.  
> 
> But since we're working directly with RAM, do we care?
> 
> The comment above that line referring to direct_access should be killed
> at the very least.

I was curious whether or not Martin's comments about "mkfs already
considers the reported queue limits (for the filesystems most people use,
anyway)" applies to say XFS [0] and whether or not the above helps guide
mkfs that way, so I took a look now.

[0] https://lore.kernel.org/all/yq1ttytbox9.fsf@ca-mkp.ca.oracle.com/

There's a default struct mkfs_default_params which sets the blocksize
to 1 << XFS_DFL_BLOCKSIZE_LOG and so at least validate_blocksize() will
use that if no parameter was passed on the cli. We also set the default
sector size to XFS_MIN_SECTORSIZE.

There are two aspects to what we use for the final mkfs, first we use
validate_blocksize(), then validate_sectorsize(). The first will set
the default block size to  1 << XFS_DFL_BLOCKSIZE_LOG (4096 bytes) if
the user did not specify anything. Then validate_sectorsize() uses
get_topology() to get the physical / logical block sizes and both:

  * blkid_topology_get_minimum_io_size()
  * blkid_topology_get_optimal_io_size()

These come from utilil-linux:

https://github.com/util-linux/util-linux.git

https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-block

So that's just

/sys/block/<disk>/queue/physical_block_size
/sys/block/<disk>/queue/logical_block_size

Then:

/sys/block/<disk>/queue/minimum_io_size

The documentation suggests " For disk drives this is often the physical block size."

/sys/block/<disk>/queue/optimal_io_size

The documentation suggests this is "This is rarely reported for disk drives."

From my review of xfs's mkfs is we essentially use the physical block
size as a default sector size if set, otherwise we use the device's logical block
size if set otherwise xfsprog's default and so 4096.

And so the above comment referring to DAX could be killed and replaced
with allowing userspace mkfs utils to use a more appropriate block size.
Or just kill the comment all together and we update the docs for
blk_queue_physical_block_size() instead.

> While you're at it, can you then also update Documentation/filesystems/dax.rst
> to remove the brd as an example driver with DAX support.
> 
> That leaves us with only a few block drivers with DAX:
> 
> - dcssblk: s390 dcss block device driver                                        
> - pmem: NVDIMM persistent memory driver  
> - some dm targets
> - fuse virtio_fs
> 
> Wonder which is the right example these days for DAX, now that
> persistant memory has End of Life'd with Optane dead, curious also of
> the value of the above and DAX in general other than support for
> whatever made it out.

Looking at this:

https://lore.kernel.org/all/62ef05515b085_1b3c29434@dwillia2-xfh.jf.intel.com.notmuch/

> Should we EOL DAX too?

I think the answer is no as a generic kernel thing, however, it probably
means we should really update the DAX documentation to reflect what our
real expecations are for its current and future uses. And that is to
highlight the non-PMEM use cases as *those* probably are EOL'd now.

  Luis

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/5] brd: make sector size configurable
  2023-03-20 22:52     ` Luis Chamberlain
@ 2023-03-20 23:40       ` Martin K. Petersen
  2023-03-21  0:14         ` Luis Chamberlain
  0 siblings, 1 reply; 26+ messages in thread
From: Martin K. Petersen @ 2023-03-20 23:40 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Hannes Reinecke, Martin K. Petersen, Dan Williams, Jens Axboe,
	linux-block, Matthew Wilcox, Keith Busch, Davidlohr Bueso,
	Fan Ni, Pankaj Raghav, Daniel Gomez, Boaz Harrosh,
	Adam Manzanares


Luis,

> /sys/block/<disk>/queue/minimum_io_size
>
> The documentation suggests " For disk drives this is often the
> physical block size."
>
> /sys/block/<disk>/queue/optimal_io_size
>
> The documentation suggests this is "This is rarely reported for disk
> drives."

min_io and opt_io are used to key mkfs.xfs' sunit/swidth. So if you're
using a hardware RAID, MD, or DM, we'll attempt to align allocations on
stripe boundaries.

Back when that "rarely reported" blurb was written (2009), we did not
have any individual disk drives which reported min_io/opt_io. Reporting
those parameters was mostly a storage array thing. These days it's
fairly common for both disk drives and SSDs to fill out these fields.

> From my review of xfs's mkfs is we essentially use the physical block
> size as a default sector size if set, otherwise we use the device's
> logical block size if set otherwise xfsprog's default and so 4096.

Yep.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/5] brd: make sector size configurable
  2023-03-20 23:40       ` Martin K. Petersen
@ 2023-03-21  0:14         ` Luis Chamberlain
  0 siblings, 0 replies; 26+ messages in thread
From: Luis Chamberlain @ 2023-03-21  0:14 UTC (permalink / raw)
  To: Martin K. Petersen, Darrick J. Wong, Bart Van Assche, Dan Helmick
  Cc: Hannes Reinecke, Dan Williams, Jens Axboe, linux-block,
	Matthew Wilcox, Keith Busch, Davidlohr Bueso, Fan Ni,
	Pankaj Raghav, Daniel Gomez, Boaz Harrosh, Adam Manzanares

On Mon, Mar 20, 2023 at 07:40:44PM -0400, Martin K. Petersen wrote:
> 
> Luis,
> 
> > /sys/block/<disk>/queue/minimum_io_size
> >
> > The documentation suggests " For disk drives this is often the
> > physical block size."
> >
> > /sys/block/<disk>/queue/optimal_io_size
> >
> > The documentation suggests this is "This is rarely reported for disk
> > drives."
> 
> min_io and opt_io are used to key mkfs.xfs' sunit/swidth. So if you're
> using a hardware RAID, MD, or DM, we'll attempt to align allocations on
> stripe boundaries.
> 
> Back when that "rarely reported" blurb was written (2009), we did not
> have any individual disk drives which reported min_io/opt_io. Reporting
> those parameters was mostly a storage array thing. These days it's
> fairly common for both disk drives and SSDs to fill out these fields.

Glad you mentioned this, I followed up in my review of these and I see
even the names, swidth, sunit, are all "stripe" indicative. Based on
what you are saying, it seems we may need to update docs to reflect
actual / new uses.

> > From my review of xfs's mkfs is we essentially use the physical block
> > size as a default sector size if set, otherwise we use the device's
> > logical block size if set otherwise xfsprog's default and so 4096.
> 
> Yep.

The use case for swidth / sunit on mkfs.xfs seemed pretty tied to
striping, and it was not obvious or clear / if using it for hints could be used
today as perhaps intended. At least all the naming and validation stuff
seems to make it very "stripy" still.

  Luis

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
       [not found]         ` <a4489f7b-912c-e68f-4a4c-c14d96026bd6@suse.de>
@ 2023-03-21 15:00           ` Matthew Wilcox
  2023-03-21 15:26             ` Hannes Reinecke
  0 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2023-03-21 15:00 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: linux-block, linux-fsdevel, linux-mm

On Tue, Mar 07, 2023 at 09:14:27AM +0100, Hannes Reinecke wrote:
> On 3/7/23 08:30, Matthew Wilcox wrote:
> > On Tue, Mar 07, 2023 at 07:55:32AM +0100, Hannes Reinecke wrote:
> > > On 3/6/23 18:37, Matthew Wilcox wrote:
> > > > On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
> > > > > -	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> > > > > -	if (!page)
> > > > > +	folio = folio_alloc(gfp | __GFP_ZERO, 0);
> > > > > +	if (!folio)
> > > > 
> > > > Did you drop HIGHMEM support on purpose?
> > > 
> > > No; I thought that folios would be doing that implicitely.
> > > Will be re-adding.
> > 
> > We can't ... not all filesystems want to allocate every folio from
> > HIGHMEM.  eg for superblocks, it often makes more sense to allocate the
> > folio from lowmem than allocate it from highmem and keep it kmapped.
> > The only GFP flag that folios force-set is __GFP_COMP because folios by
> > definition are compound pages.
> 
> Oh well.
> 
> However, when playing with the modified brd and setting the logical&physical
> blocksize to 16k the whole thing crashes
> (not unexpectedly).
> It does crash, however, in block_read_full_folio(), which rather
> surprisingly (at least for me) is using create_page_buffers();
> I would have expected something like create_folio_buffers().
> Is this work in progress or what is the plan here?

Supporting folios > PAGE_SIZE in blockdev is definitely still WIP.
I know of at least one bug, which is:

#define bh_offset(bh)           ((unsigned long)(bh)->b_data & ~PAGE_MASK)

That needs to be something like

static size_t bh_offset(const struct buffer_head *bh)
{
	return (unsigned long)bh->b_data & (folio_size(bh->b_folio) - 1);
}

I haven't done a thorough scan for folio-size problems in the block
layer; I've just been fixing up things as I notice them.

Yes, create_page_buffers() should now be create_folio_buffers().  Just
didn't get round to it yet.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/5] brd: convert to folios
  2023-03-21 15:00           ` Matthew Wilcox
@ 2023-03-21 15:26             ` Hannes Reinecke
  0 siblings, 0 replies; 26+ messages in thread
From: Hannes Reinecke @ 2023-03-21 15:26 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-block, linux-fsdevel, linux-mm

On 3/21/23 16:00, Matthew Wilcox wrote:
> On Tue, Mar 07, 2023 at 09:14:27AM +0100, Hannes Reinecke wrote:
>> On 3/7/23 08:30, Matthew Wilcox wrote:
>>> On Tue, Mar 07, 2023 at 07:55:32AM +0100, Hannes Reinecke wrote:
>>>> On 3/6/23 18:37, Matthew Wilcox wrote:
>>>>> On Mon, Mar 06, 2023 at 01:01:23PM +0100, Hannes Reinecke wrote:
>>>>>> -	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
>>>>>> -	if (!page)
>>>>>> +	folio = folio_alloc(gfp | __GFP_ZERO, 0);
>>>>>> +	if (!folio)
>>>>>
>>>>> Did you drop HIGHMEM support on purpose?
>>>>
>>>> No; I thought that folios would be doing that implicitely.
>>>> Will be re-adding.
>>>
>>> We can't ... not all filesystems want to allocate every folio from
>>> HIGHMEM.  eg for superblocks, it often makes more sense to allocate the
>>> folio from lowmem than allocate it from highmem and keep it kmapped.
>>> The only GFP flag that folios force-set is __GFP_COMP because folios by
>>> definition are compound pages.
>>
>> Oh well.
>>
>> However, when playing with the modified brd and setting the logical&physical
>> blocksize to 16k the whole thing crashes
>> (not unexpectedly).
>> It does crash, however, in block_read_full_folio(), which rather
>> surprisingly (at least for me) is using create_page_buffers();
>> I would have expected something like create_folio_buffers().
>> Is this work in progress or what is the plan here?
> 
> Supporting folios > PAGE_SIZE in blockdev is definitely still WIP.
> I know of at least one bug, which is:
> 
> #define bh_offset(bh)           ((unsigned long)(bh)->b_data & ~PAGE_MASK)
> 
> That needs to be something like
> 
> static size_t bh_offset(const struct buffer_head *bh)
> {
> 	return (unsigned long)bh->b_data & (folio_size(bh->b_folio) - 1);
> }
> 
> I haven't done a thorough scan for folio-size problems in the block
> layer; I've just been fixing up things as I notice them.
> 
And not to mention the various instances of (PAGE_SHIFT - blkbits)
which will get happily into negative numbers for large block sizes,
resulting in interesting effects for a shift left operation ...

> Yes, create_page_buffers() should now be create_folio_buffers().  Just
> didn't get round to it yet.

Ah, so I was on the right track after all.

But I was wondering ... couldn't we use the physical block size as a 
hint on how many pages to allocate?
With that we can work on updating the stack even with existing hardware, 
and we wouldn't crash immediately if we miss the odd conversion ...

Hmm?

Cheers,

Hannes


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/5] brd: make logical sector size configurable
       [not found]   ` <CGME20230517093158eucas1p2831fd21a6f66ae39c887ad91e098aa74@eucas1p2.samsung.com>
@ 2023-05-17  9:31     ` Pankaj Raghav
  0 siblings, 0 replies; 26+ messages in thread
From: Pankaj Raghav @ 2023-05-17  9:31 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jens Axboe, linux-block, Matthew Wilcox, Luis Chamberlain,
	Keith Busch, p.raghav

[-- Attachment #1: Type: text/plain, Size: 1086 bytes --]

Hi Hannes,

> @@ -309,8 +311,8 @@ static void brd_submit_bio(struct bio *bio)
>  		int err;
I think you missed the conversion from "kernel" logical sectors to the
device logical sector here:
sector_t sector = bio->bi_iter.bi_sector >> 
                 (brd->brd_logical_sector_shift - SECTOR_SHIFT);

This fixes the issue when you try to do a fio with -verify on a 4k
logical and physical block size.
>  
>  		/* Don't support un-aligned buffer */
> -		WARN_ON_ONCE((iter.offset & (SECTOR_SIZE - 1)) ||
> -				(len & (SECTOR_SIZE - 1)));
> +		WARN_ON_ONCE((iter.offset & (brd->brd_logical_sector_size - 1)) ||
> +				(len & (brd->brd_logical_sector_size - 1)));
>  
>  		err = brd_do_folio(brd, iter.folio, len, iter.offset,
>  				   bio->bi_opf, sector);
> @@ -322,7 +324,7 @@ static void brd_submit_bio(struct bio *bio)
>  			bio_io_error(bio);
>  			return;
>  		}
> -		sector += len >> SECTOR_SHIFT;
> +		sector += len >> brd->brd_logical_sector_shift;
As you can see here, you divide the len with device logical block size
to go to the next sector.
>  	}
>  
>  	bio_endio(bio);

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2023-05-17  9:32 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-06 12:01 [PATCH 0/5] brd: Allow to change block sizes Hannes Reinecke
2023-03-06 12:01 ` [PATCH 1/5] brd: convert to folios Hannes Reinecke
2023-03-06 16:04   ` kernel test robot
2023-03-06 17:37   ` Matthew Wilcox
2023-03-07  6:55     ` Hannes Reinecke
2023-03-07  7:30       ` Matthew Wilcox
2023-03-09  3:28         ` Luis Chamberlain
     [not found]         ` <a4489f7b-912c-e68f-4a4c-c14d96026bd6@suse.de>
2023-03-21 15:00           ` Matthew Wilcox
2023-03-21 15:26             ` Hannes Reinecke
2023-03-09  2:29   ` Luis Chamberlain
2023-03-06 12:01 ` [PATCH 2/5] brd: abstract page_size conventions Hannes Reinecke
2023-03-06 12:01 ` [PATCH 3/5] brd: make sector size configurable Hannes Reinecke
2023-03-09  3:12   ` Luis Chamberlain
2023-03-20 22:52     ` Luis Chamberlain
2023-03-20 23:40       ` Martin K. Petersen
2023-03-21  0:14         ` Luis Chamberlain
2023-03-06 12:01 ` [PATCH 4/5] brd: limit maximal block size to 32M Hannes Reinecke
2023-03-06 17:40   ` Matthew Wilcox
2023-03-07  6:56     ` Hannes Reinecke
2023-03-06 18:01   ` Keith Busch
2023-03-07  6:57     ` Hannes Reinecke
2023-03-06 12:01 ` [PATCH 5/5] brd: make logical sector size configurable Hannes Reinecke
     [not found]   ` <CGME20230307090934eucas1p28d92f3fd8c13edcba8e5d3fa7de6bcab@eucas1p2.samsung.com>
2023-03-07  9:01     ` Pankaj Raghav
2023-03-07 11:06       ` Hannes Reinecke
     [not found]   ` <CGME20230517093158eucas1p2831fd21a6f66ae39c887ad91e098aa74@eucas1p2.samsung.com>
2023-05-17  9:31     ` Pankaj Raghav
     [not found] ` <CGME20230307114200eucas1p296a60514feb40c4a08f380cc28aeeb51@eucas1p2.samsung.com>
2023-03-07 11:33   ` [PATCH 0/5] brd: Allow to change block sizes Pankaj Raghav

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.