All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] Selectable checksum implementation
@ 2022-07-29 17:42 David Sterba
  2022-07-29 17:42 ` [PATCH v2 1/4] btrfs: prepare more slots for checksum shash David Sterba
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: David Sterba @ 2022-07-29 17:42 UTC (permalink / raw)
  To: linux-btrfs; +Cc: David Sterba

Add a possibility to load accelerated checksum implementation after a
filesystem has been mounted. Detailed description in patch 3.

v2: rebased on misc-next

David Sterba (4):
  btrfs: prepare more slots for checksum shash
  btrfs: assign checksum shash slots on init
  btrfs: add checksum implementation selection after mount
  btrfs: sysfs: print all loaded csums implementations

 fs/btrfs/check-integrity.c |   4 +-
 fs/btrfs/ctree.h           |  13 ++++-
 fs/btrfs/disk-io.c         |  30 +++++++----
 fs/btrfs/file-item.c       |   4 +-
 fs/btrfs/inode.c           |   2 +-
 fs/btrfs/scrub.c           |  12 ++---
 fs/btrfs/super.c           |   2 -
 fs/btrfs/sysfs.c           | 101 +++++++++++++++++++++++++++++++++++--
 8 files changed, 141 insertions(+), 27 deletions(-)

-- 
2.36.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 1/4] btrfs: prepare more slots for checksum shash
  2022-07-29 17:42 [PATCH v2 0/4] Selectable checksum implementation David Sterba
@ 2022-07-29 17:42 ` David Sterba
  2022-07-29 17:42 ` [PATCH v2 2/4] btrfs: assign checksum shash slots on init David Sterba
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2022-07-29 17:42 UTC (permalink / raw)
  To: linux-btrfs; +Cc: David Sterba

There's now one checksum template fs_info::csum_shash that contains
whatever the crypto API decides is the best available implementation. To
allow loading other implementations after mount add more slots and
symbolic names. The slot 0 is always used when calculating checksum,
other slots may be set later or left empty.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/check-integrity.c |  4 ++--
 fs/btrfs/ctree.h           | 13 ++++++++++++-
 fs/btrfs/disk-io.c         | 19 ++++++++++---------
 fs/btrfs/file-item.c       |  4 ++--
 fs/btrfs/inode.c           |  2 +-
 fs/btrfs/scrub.c           | 12 ++++++------
 fs/btrfs/sysfs.c           |  2 +-
 7 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 5d20137b7b67..dbabdd01ed66 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1647,7 +1647,7 @@ static noinline_for_stack int btrfsic_test_for_metadata(
 		char **datav, unsigned int num_pages)
 {
 	struct btrfs_fs_info *fs_info = state->fs_info;
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	struct btrfs_header *h;
 	u8 csum[BTRFS_CSUM_SIZE];
 	unsigned int i;
@@ -1660,7 +1660,7 @@ static noinline_for_stack int btrfsic_test_for_metadata(
 	if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE))
 		return 1;
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 	crypto_shash_init(shash);
 
 	for (i = 0; i < num_pages; i++) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6a084a9214c..1e55dfa49827 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -674,6 +674,13 @@ struct btrfs_commit_stats {
 	u64 total_commit_dur;
 };
 
+enum btrfs_csum_impl {
+	CSUM_DEFAULT,
+	CSUM_GENERIC,
+	CSUM_ACCEL,
+	CSUM_COUNT
+};
+
 struct btrfs_fs_info {
 	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
 	unsigned long flags;
@@ -1061,7 +1068,11 @@ struct btrfs_fs_info {
 	spinlock_t swapfile_pins_lock;
 	struct rb_root swapfile_pins;
 
-	struct crypto_shash *csum_shash;
+	/*
+	 * Templates for checksum calculation, slot 0 is for the currently used
+	 * one, other slots are optional for available implementations.
+	 */
+	struct crypto_shash *csum_shash[CSUM_COUNT];
 
 	/* Type of exclusive operation running, protected by super_lock */
 	enum btrfs_exclusive_operation exclusive_operation;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6ac8d73d4b51..3c2251199f0c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -65,8 +65,9 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
 
 static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
 {
-	if (fs_info->csum_shash)
-		crypto_free_shash(fs_info->csum_shash);
+	for (int i = 0; i < CSUM_COUNT; i++)
+		if (fs_info->csum_shash[i])
+			crypto_free_shash(fs_info->csum_shash[i]);
 }
 
 /*
@@ -176,11 +177,11 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 	struct btrfs_fs_info *fs_info = buf->fs_info;
 	const int num_pages = num_extent_pages(buf);
 	const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	char *kaddr;
 	int i;
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 	crypto_shash_init(shash);
 	kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
 	crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
@@ -255,9 +256,9 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 	struct btrfs_super_block *disk_sb =
 		(struct btrfs_super_block *)raw_disk_sb;
 	char result[BTRFS_CSUM_SIZE];
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 
 	/*
 	 * The super_block structure does not span the whole
@@ -2429,7 +2430,7 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
 		return PTR_ERR(csum_shash);
 	}
 
-	fs_info->csum_shash = csum_shash;
+	fs_info->csum_shash[CSUM_DEFAULT] = csum_shash;
 
 	btrfs_info(fs_info, "using %s (%s) checksum algorithm",
 			btrfs_super_csum_name(csum_type),
@@ -4012,7 +4013,7 @@ static int write_dev_supers(struct btrfs_device *device,
 {
 	struct btrfs_fs_info *fs_info = device->fs_info;
 	struct address_space *mapping = device->bdev->bd_inode->i_mapping;
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	int i;
 	int errors = 0;
 	int ret;
@@ -4021,7 +4022,7 @@ static int write_dev_supers(struct btrfs_device *device,
 	if (max_mirrors == 0)
 		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 
 	for (i = 0; i < max_mirrors; i++) {
 		struct page *page;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c828f971a346..6e416230733b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -635,7 +635,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
 				u64 offset, bool one_ordered)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	struct btrfs_ordered_sum *sums;
 	struct btrfs_ordered_extent *ordered = NULL;
 	const bool use_page_offsets = (offset == (u64)-1);
@@ -663,7 +663,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
 	sums->bytenr = bio->bi_iter.bi_sector << 9;
 	index = 0;
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 
 	bio_for_each_segment(bvec, bio, iter) {
 		if (use_page_offsets)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ecc5fa3343fc..a16a7379c58d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3429,7 +3429,7 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
 
 	ASSERT(pgoff + fs_info->sectorsize <= PAGE_SIZE);
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 
 	kaddr = kmap_local_page(page) + pgoff;
 	crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 3afe5fa50a63..f67a35cf0446 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1775,7 +1775,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
 {
 	struct scrub_ctx *sctx = sblock->sctx;
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	u8 csum[BTRFS_CSUM_SIZE];
 	struct scrub_sector *sector;
 	char *kaddr;
@@ -1787,7 +1787,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
 
 	kaddr = page_address(sector->page);
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 	crypto_shash_init(shash);
 
 	/*
@@ -1806,7 +1806,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
 	struct scrub_ctx *sctx = sblock->sctx;
 	struct btrfs_header *h;
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	u8 calculated_csum[BTRFS_CSUM_SIZE];
 	u8 on_disk_csum[BTRFS_CSUM_SIZE];
 	/*
@@ -1850,7 +1850,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
 		   BTRFS_UUID_SIZE))
 		sblock->header_error = 1;
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 	crypto_shash_init(shash);
 	crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
 			    sectorsize - BTRFS_CSUM_SIZE);
@@ -1872,7 +1872,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 	struct btrfs_super_block *s;
 	struct scrub_ctx *sctx = sblock->sctx;
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
-	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash[CSUM_DEFAULT]);
 	u8 calculated_csum[BTRFS_CSUM_SIZE];
 	struct scrub_sector *sector;
 	char *kaddr;
@@ -1893,7 +1893,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 	if (!scrub_check_fsid(s->fsid, sector))
 		++fail_cor;
 
-	shash->tfm = fs_info->csum_shash;
+	shash->tfm = fs_info->csum_shash[CSUM_DEFAULT];
 	crypto_shash_init(shash);
 	crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
 			BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 32714ef8e22b..cc943b236c92 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1097,7 +1097,7 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
 
 	return sysfs_emit(buf, "%s (%s)\n",
 			  btrfs_super_csum_name(csum_type),
-			  crypto_shash_driver_name(fs_info->csum_shash));
+			  crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]));
 }
 
 BTRFS_ATTR(, checksum, btrfs_checksum_show);
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 2/4] btrfs: assign checksum shash slots on init
  2022-07-29 17:42 [PATCH v2 0/4] Selectable checksum implementation David Sterba
  2022-07-29 17:42 ` [PATCH v2 1/4] btrfs: prepare more slots for checksum shash David Sterba
@ 2022-07-29 17:42 ` David Sterba
  2022-07-29 17:42 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2022-07-29 17:42 UTC (permalink / raw)
  To: linux-btrfs; +Cc: David Sterba

When initializing checksum implementation on first mount, assign it to
the proper slot based on the driver name. If it contains "generic" it's
considered the non-accelerated one. Based on that properly set the
BTRFS_FS_CSUM_IMPL_FAST bit, previously it could be mistakenly set as
fast despite a different checksum (eg. sha256) with generic
implementation.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 13 ++++++++++++-
 fs/btrfs/super.c   |  2 --
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3c2251199f0c..247187427760 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2430,7 +2430,18 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
 		return PTR_ERR(csum_shash);
 	}
 
-	fs_info->csum_shash[CSUM_DEFAULT] = csum_shash;
+	/*
+	 * Find the fastest implementation available, but keep the slots
+	 * matching the type.
+	 */
+	if (strstr(crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]),
+		   "generic") != NULL) {
+		fs_info->csum_shash[CSUM_GENERIC] = csum_shash;
+		clear_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
+	} else {
+		fs_info->csum_shash[CSUM_ACCEL] = csum_shash;
+		set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
+	}
 
 	btrfs_info(fs_info, "using %s (%s) checksum algorithm",
 			btrfs_super_csum_name(csum_type),
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1032aaa2c2f4..595b8d92ef86 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1819,8 +1819,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	} else {
 		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
 		btrfs_sb(s)->bdev_holder = fs_type;
-		if (!strstr(crc32c_impl(), "generic"))
-			set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
 		error = btrfs_fill_super(s, fs_devices, data);
 	}
 	if (!error)
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-07-29 17:42 [PATCH v2 0/4] Selectable checksum implementation David Sterba
  2022-07-29 17:42 ` [PATCH v2 1/4] btrfs: prepare more slots for checksum shash David Sterba
  2022-07-29 17:42 ` [PATCH v2 2/4] btrfs: assign checksum shash slots on init David Sterba
@ 2022-07-29 17:42 ` David Sterba
  2022-08-01  8:31   ` Johannes Thumshirn
  2022-07-29 17:42 ` [PATCH v2 4/4] btrfs: sysfs: print all loaded csums implementations David Sterba
  2022-07-29 21:01 ` [PATCH v2 0/4] Selectable checksum implementation Boris Burkov
  4 siblings, 1 reply; 16+ messages in thread
From: David Sterba @ 2022-07-29 17:42 UTC (permalink / raw)
  To: linux-btrfs; +Cc: David Sterba

The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
and the crypto module implementing it. In the scenario when there's only
the default generic implementation available during mount it's selected,
even if there's an unloaded module with accelerated version.

This happens with sha256 that's often built-in so the crypto API may not
trigger loading the modules and select the fastest implementation. Such
filesystem could be left using in the generic for the whole time.
Remount can't fix that and full umount/mount cycle may be impossible eg.
for a root filesystem.

Allow writing strings to the sysfs checksum file that will trigger
loading the crypto shash again and check if the found module is the
desired one.

Possible values:

- default - select whatever is considered default by crypto subsystem,
            either generic or accelerated
- accel   - try loading an accelerated implementation (must not contain
            "generic" in the name)
- generic - load and select the generic implementation

A typical scenario, assuming sha256 is built-in:

  $ mkfs.btrfs --csum sha256
  $ lsmod | grep sha256
  $ mount /dev /mnt
  $ cat ...FSID/checksum
  sha256 (sha256-generic)
  $ modprobe sha256
  $ lsmod | grep sha256
  sha256_ssse3
  $ echo accel > ...FSID/checksum
  sha256 (sha256-ni)

The crypto shash for all slots has the same lifetime as the mount, so
it's not possible to unload the crypto module.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index cc943b236c92..0044644056ed 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1100,7 +1100,91 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
 			  crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]));
 }
 
-BTRFS_ATTR(, checksum, btrfs_checksum_show);
+static const char csum_impl[][8] = {
+	[CSUM_DEFAULT]	= "default",
+	[CSUM_GENERIC]	= "generic",
+	[CSUM_ACCEL]	= "accel",
+};
+
+static int select_checksum(struct btrfs_fs_info *fs_info, enum btrfs_csum_impl type)
+{
+	/* Already set */
+	if (fs_info->csum_shash[CSUM_DEFAULT] == fs_info->csum_shash[type])
+		return 0;
+
+	/* Allocate new if needed */
+	if (!fs_info->csum_shash[type]) {
+		const u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+		const char *csum_driver = btrfs_super_csum_driver(csum_type);
+		struct crypto_shash *shash1, *tmp;
+		char full_name[32];
+		u32 mask = 0;
+
+		/*
+		 * Generic must be requested explicitly, otherwise it could
+		 * be accelerated implementation with highest priority.
+		 */
+		scnprintf(full_name, sizeof(full_name), "%s%s", csum_driver,
+			  (type == CSUM_GENERIC ? "-generic" : ""));
+
+		shash1 = crypto_alloc_shash(full_name, 0, mask);
+		if (IS_ERR(shash1))
+			return PTR_ERR(shash1);
+
+		/* Accelerated requested but generic found */
+		if (type != CSUM_GENERIC &&
+		    strstr(crypto_shash_driver_name(shash1), "generic")) {
+			crypto_free_shash(shash1);
+			return -ENOENT;
+		}
+
+		tmp = cmpxchg(&fs_info->csum_shash[type], NULL, shash1);
+		if (tmp) {
+			/* Something raced in */
+			crypto_free_shash(shash1);
+		}
+	}
+
+	/* Select it */
+	fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[type];
+	return 0;
+}
+
+static bool strmatch(const char *buffer, const char *string);
+
+static ssize_t btrfs_checksum_store(struct kobject *kobj,
+				    struct kobj_attribute *a,
+				    const char *buf, size_t len)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+
+	if (!fs_info)
+		return -EPERM;
+
+	/* Pick the best available, generic or accelerated */
+	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {
+		if (fs_info->csum_shash[CSUM_ACCEL]) {
+			fs_info->csum_shash[CSUM_DEFAULT] =
+				fs_info->csum_shash[CSUM_ACCEL];
+			return len;
+		}
+		ASSERT(fs_info->csum_shash[CSUM_GENERIC]);
+		fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[CSUM_GENERIC];
+		return len;
+	}
+
+	for (int i = 1; i < CSUM_COUNT; i++) {
+		if (strmatch(buf, csum_impl[i])) {
+			int ret;
+
+			ret = select_checksum(fs_info, i);
+			return ret < 0 ? ret : len;
+		}
+	}
+
+	return -EINVAL;
+}
+BTRFS_ATTR_RW(, checksum, btrfs_checksum_show, btrfs_checksum_store);
 
 static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
 		struct kobj_attribute *a, char *buf)
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 4/4] btrfs: sysfs: print all loaded csums implementations
  2022-07-29 17:42 [PATCH v2 0/4] Selectable checksum implementation David Sterba
                   ` (2 preceding siblings ...)
  2022-07-29 17:42 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
@ 2022-07-29 17:42 ` David Sterba
  2022-07-29 21:01 ` [PATCH v2 0/4] Selectable checksum implementation Boris Burkov
  4 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2022-07-29 17:42 UTC (permalink / raw)
  To: linux-btrfs; +Cc: David Sterba

Extend the sysfs FSID/checksum file and append lines with the selector
strings and implementation if loaded, or 'none'.

Output may look like:

  crc32c (crc32c-generic)
  generic: crc32c-generic
  accel:   crc32c-intel

Scripts that rely on single line in the file need to be updated.

All available and loaded implementations can be found in /proc/crypto .

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 0044644056ed..2f8bd75ce69d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1095,9 +1095,18 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
 	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
 	u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
 
-	return sysfs_emit(buf, "%s (%s)\n",
-			  btrfs_super_csum_name(csum_type),
-			  crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]));
+	return sysfs_emit(buf,
+			"%s (%s)\n"
+			"generic: %s\n"
+			"accel:   %s\n",
+			btrfs_super_csum_name(csum_type),
+			crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]),
+			fs_info->csum_shash[CSUM_GENERIC]
+			? crypto_shash_driver_name(fs_info->csum_shash[CSUM_GENERIC])
+			: "none",
+			fs_info->csum_shash[CSUM_ACCEL]
+			? crypto_shash_driver_name(fs_info->csum_shash[CSUM_ACCEL])
+			: "none");
 }
 
 static const char csum_impl[][8] = {
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 0/4] Selectable checksum implementation
  2022-07-29 17:42 [PATCH v2 0/4] Selectable checksum implementation David Sterba
                   ` (3 preceding siblings ...)
  2022-07-29 17:42 ` [PATCH v2 4/4] btrfs: sysfs: print all loaded csums implementations David Sterba
@ 2022-07-29 21:01 ` Boris Burkov
  2022-08-01 18:55   ` David Sterba
  4 siblings, 1 reply; 16+ messages in thread
From: Boris Burkov @ 2022-07-29 21:01 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs

On Fri, Jul 29, 2022 at 07:42:42PM +0200, David Sterba wrote:
> Add a possibility to load accelerated checksum implementation after a
> filesystem has been mounted. Detailed description in patch 3.

I naively attempted to use this on my test VM and it blew up in mount.

What I ran:
mkfs.btrfs -f $dev --csum sha256
mount $dev $mnt

I got this oops:
https://pastebin.com/DAbSem7K

which indicates a null pointer access in this code:
(gdb) list *open_ctree+0x3c9
0xffffffff8210634e is in open_ctree (fs/btrfs/disk-io.c:2437).
2432
2433            /*
2434             * Find the fastest implementation available, but keep the slots
2435             * matching the type.
2436             */
2437            if (strstr(crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]),
2438                       "generic") != NULL) {
2439                    fs_info->csum_shash[CSUM_GENERIC] = csum_shash;
2440                    clear_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
2441            } else {

So I suspect the problem is in btrfs_init_csum_hash. Looking at it, two
things come to mind:
1. the old code used the name from the allocated hash, maybe we need to
check that against "generic" instead of the DEFAULT
2. we never set the DEFAULT entry to anything, so it isn't clear to me
how the use of the checksum would work after this (which it does seem to
do, to csum the super block).

Running misc-next, it mounted and reported it was using sha256-generic:
$ cat /sys/fs/btrfs/8ffa8559-f6c4-41d3-a806-c12738367d72/checksum
sha256 (sha256-generic)

I have not yet figured out how to get the virtio accel stuff to actually
work in the VM, so I haven't tested the happy case yet, but I figured
this report would be interesting on its own.

For further info, lsmod is empty on this VM. And here are the contents
of /proc/crypto:
https://pastebin.com/mZW6tq29

Hope that's helpful, and apologies if I didn't use the API correctly...

> 
> v2: rebased on misc-next
> 
> David Sterba (4):
>   btrfs: prepare more slots for checksum shash
>   btrfs: assign checksum shash slots on init
>   btrfs: add checksum implementation selection after mount
>   btrfs: sysfs: print all loaded csums implementations
> 
>  fs/btrfs/check-integrity.c |   4 +-
>  fs/btrfs/ctree.h           |  13 ++++-
>  fs/btrfs/disk-io.c         |  30 +++++++----
>  fs/btrfs/file-item.c       |   4 +-
>  fs/btrfs/inode.c           |   2 +-
>  fs/btrfs/scrub.c           |  12 ++---
>  fs/btrfs/super.c           |   2 -
>  fs/btrfs/sysfs.c           | 101 +++++++++++++++++++++++++++++++++++--
>  8 files changed, 141 insertions(+), 27 deletions(-)
> 
> -- 
> 2.36.1
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-07-29 17:42 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
@ 2022-08-01  8:31   ` Johannes Thumshirn
  0 siblings, 0 replies; 16+ messages in thread
From: Johannes Thumshirn @ 2022-08-01  8:31 UTC (permalink / raw)
  To: David Sterba, linux-btrfs

On 29.07.22 19:48, David Sterba wrote:
> static bool strmatch(const char *buffer, const char *string);
> +
> +static ssize_t btrfs_checksum_store(struct kobject *kobj,
> +				    struct kobj_attribute *a,
> +				    const char *buf, size_t len)
> +{
> +	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
> +
> +	if (!fs_info)
> +		return -EPERM;
> +
> +	/* Pick the best available, generic or accelerated */
> +	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {

Can't you use sysfs_streq() here?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 0/4] Selectable checksum implementation
  2022-07-29 21:01 ` [PATCH v2 0/4] Selectable checksum implementation Boris Burkov
@ 2022-08-01 18:55   ` David Sterba
  0 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2022-08-01 18:55 UTC (permalink / raw)
  To: Boris Burkov; +Cc: David Sterba, linux-btrfs

On Fri, Jul 29, 2022 at 02:01:41PM -0700, Boris Burkov wrote:
> On Fri, Jul 29, 2022 at 07:42:42PM +0200, David Sterba wrote:
> > Add a possibility to load accelerated checksum implementation after a
> > filesystem has been mounted. Detailed description in patch 3.
> 
> I naively attempted to use this on my test VM and it blew up in mount.
> 
> What I ran:
> mkfs.btrfs -f $dev --csum sha256
> mount $dev $mnt
> 
> I got this oops:
> https://pastebin.com/DAbSem7K
> 
> which indicates a null pointer access in this code:
> (gdb) list *open_ctree+0x3c9
> 0xffffffff8210634e is in open_ctree (fs/btrfs/disk-io.c:2437).
> 2432
> 2433            /*
> 2434             * Find the fastest implementation available, but keep the slots
> 2435             * matching the type.
> 2436             */
> 2437            if (strstr(crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]),
> 2438                       "generic") != NULL) {
> 2439                    fs_info->csum_shash[CSUM_GENERIC] = csum_shash;
> 2440                    clear_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
> 2441            } else {
> 
> So I suspect the problem is in btrfs_init_csum_hash. Looking at it, two
> things come to mind:
> 1. the old code used the name from the allocated hash, maybe we need to
> check that against "generic" instead of the DEFAULT
> 2. we never set the DEFAULT entry to anything, so it isn't clear to me
> how the use of the checksum would work after this (which it does seem to
> do, to csum the super block).

That DEFAULT was not set is the problem, in patch 2 there's removed line
setting it, this is a rebase error on my side, I was splitting the
patches from longer series. Thanks for catching it, I'll resend.

> Running misc-next, it mounted and reported it was using sha256-generic:
> $ cat /sys/fs/btrfs/8ffa8559-f6c4-41d3-a806-c12738367d72/checksum
> sha256 (sha256-generic)
> 
> I have not yet figured out how to get the virtio accel stuff to actually
> work in the VM, so I haven't tested the happy case yet, but I figured
> this report would be interesting on its own.

I think you need to enable in .config the accelerated version,
CONFIG_CRYPTO_SHA256_SSSE3=m, and run KVM guest that has the CPU
features available. I'm running it with '-host', virtio only for random
number generator and block devices.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-03 14:00     ` David Sterba
@ 2022-08-04 11:12       ` Anand Jain
  0 siblings, 0 replies; 16+ messages in thread
From: Anand Jain @ 2022-08-04 11:12 UTC (permalink / raw)
  To: dsterba, David Sterba, linux-btrfs



On 03/08/2022 22:00, David Sterba wrote:
> On Wed, Aug 03, 2022 at 08:06:46AM +0800, Anand Jain wrote:
>> On 02/08/2022 20:28, David Sterba wrote:
>>> The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
>>> and the crypto module implementing it. In the scenario when there's only
>>> the default generic implementation available during mount it's selected,
>>> even if there's an unloaded module with accelerated version.
>>>
>>> This happens with sha256 that's often built-in so the crypto API may not
>>> trigger loading the modules and select the fastest implementation. Such
>>> filesystem could be left using in the generic for the whole time.
>>> Remount can't fix that and full umount/mount cycle may be impossible eg.
>>> for a root filesystem.
>>>
>>> Allow writing strings to the sysfs checksum file that will trigger
>>> loading the crypto shash again and check if the found module is the
>>> desired one.
>>>
>>> Possible values:
>>>
>>> - default - select whatever is considered default by crypto subsystem,
>>>               either generic or accelerated
>>> - accel   - try loading an accelerated implementation (must not contain
>>>               "generic" in the name)
>>> - generic - load and select the generic implementation
>>>
>>> A typical scenario, assuming sha256 is built-in:
>>>
>>>     $ mkfs.btrfs --csum sha256
>>>     $ lsmod | grep sha256
>>>     $ mount /dev /mnt
>>>     $ cat ...FSID/checksum
>>>     sha256 (sha256-generic)
>>>     $ modprobe sha256
>>>     $ lsmod | grep sha256
>>>     sha256_ssse3
>>>     $ echo accel > ...FSID/checksum
>>>     sha256 (sha256-ni)
>>
>> I am not sure if I understand the use of slots 1 and 2 correctly.
>> As each checksum type can be either generic or accelerated, slot 0 is
>> the default which tells the preferred method. So slot0 is either
>> CSUM_GENERIC or CSUM_ACCEL. And by default, we prefer accelerated when
>> available or user requests. So I am still not getting the purpose of
>> slot1 and 2. Instead, overwriting slot0 will still do the job without
>> slot1 and 2.
> 
> We need to keep track of all allocated hashes so they don't leak, which
> would happen if slot 0 is overwritten after the switch. Also we must not
> free the hash from sysfs because it could be in use. That's why there
> are slots that keep track of the allocated structures and slot 0 that's
> provided to use it for checksumming, merely as an alias.
> 

> The pointer switch is atomic (regarding the pointer value, not the
> atomic_t semantics) so once some function uses it for shash it'll stay
> there even if it won't be the default anymore.

How would that help? The read part depends on the CSUM_DEFAULT, which 
can change per user request. As below.

+	/* Select it */
+	fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[type];


> 
> I could add READ_ONCE/WRITE_ONCE for the slots so it's clear the access
> is done in some unusal way, as we do with other sysfs variables.
> 

IMO it is a must as we do read twice in all functions. For example:

disk-io.c:	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
disk-io.c:	shash->tfm = fs_info->csum_shash;


>>> The crypto shash for all slots has the same lifetime as the mount, so
>>> it's not possible to unload the crypto module.
>>
>> How does the update of the accelerated module work if a btrfs is a
>> non-root filesystem? Does it need a reboot?
> 
> By update you mean rmmod/insmod with eventual update of the .ko file?
Yes.

> That won't work due to reference counts, so reloading the module would
> require unmounting the filesystem, or reboot.

ok.

> Freeing the allocated hash structure could be implemented by usage
> counters when there's some checksumming in progress, the sysfs write
> would wait until there are no users left and then free it.

Looks like we need something that.


A basic question

Isn't that a module load at the boot will solve the problem?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-03  0:06   ` Anand Jain
@ 2022-08-03 14:00     ` David Sterba
  2022-08-04 11:12       ` Anand Jain
  0 siblings, 1 reply; 16+ messages in thread
From: David Sterba @ 2022-08-03 14:00 UTC (permalink / raw)
  To: Anand Jain; +Cc: David Sterba, linux-btrfs

On Wed, Aug 03, 2022 at 08:06:46AM +0800, Anand Jain wrote:
> On 02/08/2022 20:28, David Sterba wrote:
> > The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
> > and the crypto module implementing it. In the scenario when there's only
> > the default generic implementation available during mount it's selected,
> > even if there's an unloaded module with accelerated version.
> > 
> > This happens with sha256 that's often built-in so the crypto API may not
> > trigger loading the modules and select the fastest implementation. Such
> > filesystem could be left using in the generic for the whole time.
> > Remount can't fix that and full umount/mount cycle may be impossible eg.
> > for a root filesystem.
> > 
> > Allow writing strings to the sysfs checksum file that will trigger
> > loading the crypto shash again and check if the found module is the
> > desired one.
> > 
> > Possible values:
> > 
> > - default - select whatever is considered default by crypto subsystem,
> >              either generic or accelerated
> > - accel   - try loading an accelerated implementation (must not contain
> >              "generic" in the name)
> > - generic - load and select the generic implementation
> > 
> > A typical scenario, assuming sha256 is built-in:
> > 
> >    $ mkfs.btrfs --csum sha256
> >    $ lsmod | grep sha256
> >    $ mount /dev /mnt
> >    $ cat ...FSID/checksum
> >    sha256 (sha256-generic)
> >    $ modprobe sha256
> >    $ lsmod | grep sha256
> >    sha256_ssse3
> >    $ echo accel > ...FSID/checksum
> >    sha256 (sha256-ni)
> 
> I am not sure if I understand the use of slots 1 and 2 correctly.
> As each checksum type can be either generic or accelerated, slot 0 is
> the default which tells the preferred method. So slot0 is either
> CSUM_GENERIC or CSUM_ACCEL. And by default, we prefer accelerated when
> available or user requests. So I am still not getting the purpose of
> slot1 and 2. Instead, overwriting slot0 will still do the job without
> slot1 and 2.

We need to keep track of all allocated hashes so they don't leak, which
would happen if slot 0 is overwritten after the switch. Also we must not
free the hash from sysfs because it could be in use. That's why there
are slots that keep track of the allocated structures and slot 0 that's
provided to use it for checksumming, merely as an alias.

The pointer switch is atomic (regarding the pointer value, not the
atomic_t semantics) so once some function uses it for shash it'll stay
there even if it won't be the default anymore.

I could add READ_ONCE/WRITE_ONCE for the slots so it's clear the access
is done in some unusal way, as we do with other sysfs variables.

> > The crypto shash for all slots has the same lifetime as the mount, so
> > it's not possible to unload the crypto module.
> 
> How does the update of the accelerated module work if a btrfs is a
> non-root filesystem? Does it need a reboot?

By update you mean rmmod/insmod with eventual update of the .ko file?
That won't work due to reference counts, so reloading the module would
require unmounting the filesystem, or reboot.

Freeing the allocated hash structure could be implemented by usage
counters when there's some checksumming in progress, the sysfs write
would wait until there are no users left and then free it.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-03  0:22   ` Anand Jain
@ 2022-08-03 13:24     ` David Sterba
  0 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2022-08-03 13:24 UTC (permalink / raw)
  To: Anand Jain; +Cc: David Sterba, linux-btrfs

On Wed, Aug 03, 2022 at 08:22:05AM +0800, Anand Jain wrote:
> On 02/08/2022 20:28, David Sterba wrote:
> > The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
> > and the crypto module implementing it.
> 
> 
> > In the scenario when there's only
> > the default generic implementation available during mount it's selected,
> > even if there's an unloaded module with accelerated version.
> 
> > This happens with sha256 that's often built-in so the crypto API may not
> > trigger loading the modules and select the fastest implementation.
> 
> Hmm ok.
> What happens in the scenario if the accelerated module is loaded? Will 
> the crypto API use the accelerated module?

Yes, each implementation of the hash has a priority and the loading
mechanism goes through all registered implementations and picks the one
with highest priority. Generic is 100, sha256-ni is 250, there are 3
more implementations based on CPU flags, you can see it in
arch/x86_64/crypto/sha256_ssse3_glue.c .

There are some tricks for crc32c that trigger loading the accelerated
module even if there's the generic available, and since sha256 has
become mandatory for some security features (signing) the generic one is
always available. Similar mechanism as for crc32c is not done.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-02 12:28 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
  2022-08-02 13:00   ` Johannes Thumshirn
  2022-08-03  0:06   ` Anand Jain
@ 2022-08-03  0:22   ` Anand Jain
  2022-08-03 13:24     ` David Sterba
  2 siblings, 1 reply; 16+ messages in thread
From: Anand Jain @ 2022-08-03  0:22 UTC (permalink / raw)
  To: David Sterba, linux-btrfs

On 02/08/2022 20:28, David Sterba wrote:
> The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
> and the crypto module implementing it.


> In the scenario when there's only
> the default generic implementation available during mount it's selected,
> even if there's an unloaded module with accelerated version.

> This happens with sha256 that's often built-in so the crypto API may not
> trigger loading the modules and select the fastest implementation.

Hmm ok.
What happens in the scenario if the accelerated module is loaded? Will 
the crypto API use the accelerated module?

- Anand

> Such
> filesystem could be left using in the generic for the whole time.
> Remount can't fix that and full umount/mount cycle may be impossible eg.
> for a root filesystem.
> 
> Allow writing strings to the sysfs checksum file that will trigger
> loading the crypto shash again and check if the found module is the
> desired one.
> 
> Possible values:
> 
> - default - select whatever is considered default by crypto subsystem,
>              either generic or accelerated
> - accel   - try loading an accelerated implementation (must not contain
>              "generic" in the name)
> - generic - load and select the generic implementation
> 
> A typical scenario, assuming sha256 is built-in:
> 
>    $ mkfs.btrfs --csum sha256
>    $ lsmod | grep sha256
>    $ mount /dev /mnt
>    $ cat ...FSID/checksum
>    sha256 (sha256-generic)
>    $ modprobe sha256
>    $ lsmod | grep sha256
>    sha256_ssse3
>    $ echo accel > ...FSID/checksum
>    sha256 (sha256-ni)
> 
> The crypto shash for all slots has the same lifetime as the mount, so
> it's not possible to unload the crypto module.
> 
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>   fs/btrfs/sysfs.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index cc943b236c92..0044644056ed 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1100,7 +1100,91 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
>   			  crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]));
>   }
>   
> -BTRFS_ATTR(, checksum, btrfs_checksum_show);
> +static const char csum_impl[][8] = {
> +	[CSUM_DEFAULT]	= "default",
> +	[CSUM_GENERIC]	= "generic",
> +	[CSUM_ACCEL]	= "accel",
> +};
> +
> +static int select_checksum(struct btrfs_fs_info *fs_info, enum btrfs_csum_impl type)
> +{
> +	/* Already set */
> +	if (fs_info->csum_shash[CSUM_DEFAULT] == fs_info->csum_shash[type])
> +		return 0;
> +
> +	/* Allocate new if needed */
> +	if (!fs_info->csum_shash[type]) {
> +		const u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
> +		const char *csum_driver = btrfs_super_csum_driver(csum_type);
> +		struct crypto_shash *shash1, *tmp;
> +		char full_name[32];
> +		u32 mask = 0;
> +
> +		/*
> +		 * Generic must be requested explicitly, otherwise it could
> +		 * be accelerated implementation with highest priority.
> +		 */
> +		scnprintf(full_name, sizeof(full_name), "%s%s", csum_driver,
> +			  (type == CSUM_GENERIC ? "-generic" : ""));
> +
> +		shash1 = crypto_alloc_shash(full_name, 0, mask);
> +		if (IS_ERR(shash1))
> +			return PTR_ERR(shash1);
> +
> +		/* Accelerated requested but generic found */
> +		if (type != CSUM_GENERIC &&
> +		    strstr(crypto_shash_driver_name(shash1), "generic")) {
> +			crypto_free_shash(shash1);
> +			return -ENOENT;
> +		}
> +
> +		tmp = cmpxchg(&fs_info->csum_shash[type], NULL, shash1);
> +		if (tmp) {
> +			/* Something raced in */
> +			crypto_free_shash(shash1);
> +		}
> +	}
> +
> +	/* Select it */
> +	fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[type];
> +	return 0;
> +}
> +
> +static bool strmatch(const char *buffer, const char *string);
> +
> +static ssize_t btrfs_checksum_store(struct kobject *kobj,
> +				    struct kobj_attribute *a,
> +				    const char *buf, size_t len)
> +{
> +	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
> +
> +	if (!fs_info)
> +		return -EPERM;
> +
> +	/* Pick the best available, generic or accelerated */
> +	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {
> +		if (fs_info->csum_shash[CSUM_ACCEL]) {
> +			fs_info->csum_shash[CSUM_DEFAULT] =
> +				fs_info->csum_shash[CSUM_ACCEL];
> +			return len;
> +		}
> +		ASSERT(fs_info->csum_shash[CSUM_GENERIC]);
> +		fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[CSUM_GENERIC];
> +		return len;
> +	}
> +
> +	for (int i = 1; i < CSUM_COUNT; i++) {
> +		if (strmatch(buf, csum_impl[i])) {
> +			int ret;
> +
> +			ret = select_checksum(fs_info, i);
> +			return ret < 0 ? ret : len;
> +		}
> +	}
> +
> +	return -EINVAL;
> +}
> +BTRFS_ATTR_RW(, checksum, btrfs_checksum_show, btrfs_checksum_store);
>   
>   static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
>   		struct kobj_attribute *a, char *buf)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-02 12:28 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
  2022-08-02 13:00   ` Johannes Thumshirn
@ 2022-08-03  0:06   ` Anand Jain
  2022-08-03 14:00     ` David Sterba
  2022-08-03  0:22   ` Anand Jain
  2 siblings, 1 reply; 16+ messages in thread
From: Anand Jain @ 2022-08-03  0:06 UTC (permalink / raw)
  To: David Sterba, linux-btrfs

On 02/08/2022 20:28, David Sterba wrote:
> The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
> and the crypto module implementing it. In the scenario when there's only
> the default generic implementation available during mount it's selected,
> even if there's an unloaded module with accelerated version.
> 
> This happens with sha256 that's often built-in so the crypto API may not
> trigger loading the modules and select the fastest implementation. Such
> filesystem could be left using in the generic for the whole time.
> Remount can't fix that and full umount/mount cycle may be impossible eg.
> for a root filesystem.
> 
> Allow writing strings to the sysfs checksum file that will trigger
> loading the crypto shash again and check if the found module is the
> desired one.
> 
> Possible values:
> 
> - default - select whatever is considered default by crypto subsystem,
>              either generic or accelerated
> - accel   - try loading an accelerated implementation (must not contain
>              "generic" in the name)
> - generic - load and select the generic implementation
> 
> A typical scenario, assuming sha256 is built-in:
> 
>    $ mkfs.btrfs --csum sha256
>    $ lsmod | grep sha256
>    $ mount /dev /mnt
>    $ cat ...FSID/checksum
>    sha256 (sha256-generic)
>    $ modprobe sha256
>    $ lsmod | grep sha256
>    sha256_ssse3
>    $ echo accel > ...FSID/checksum
>    sha256 (sha256-ni)

I am not sure if I understand the use of slots 1 and 2 correctly.
As each checksum type can be either generic or accelerated, slot 0 is
the default which tells the preferred method. So slot0 is either
CSUM_GENERIC or CSUM_ACCEL. And by default, we prefer accelerated when
available or user requests. So I am still not getting the purpose of
slot1 and 2. Instead, overwriting slot0 will still do the job without
slot1 and 2.

> The crypto shash for all slots has the same lifetime as the mount, so
> it's not possible to unload the crypto module.

How does the update of the accelerated module work if a btrfs is a
non-root filesystem? Does it need a reboot?

-Anand


> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>   fs/btrfs/sysfs.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index cc943b236c92..0044644056ed 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1100,7 +1100,91 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
>   			  crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]));
>   }
>   
> -BTRFS_ATTR(, checksum, btrfs_checksum_show);
> +static const char csum_impl[][8] = {
> +	[CSUM_DEFAULT]	= "default",
> +	[CSUM_GENERIC]	= "generic",
> +	[CSUM_ACCEL]	= "accel",
> +};
> +
> +static int select_checksum(struct btrfs_fs_info *fs_info, enum btrfs_csum_impl type)
> +{
> +	/* Already set */
> +	if (fs_info->csum_shash[CSUM_DEFAULT] == fs_info->csum_shash[type])
> +		return 0;
> +
> +	/* Allocate new if needed */
> +	if (!fs_info->csum_shash[type]) {
> +		const u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
> +		const char *csum_driver = btrfs_super_csum_driver(csum_type);
> +		struct crypto_shash *shash1, *tmp;
> +		char full_name[32];
> +		u32 mask = 0;
> +
> +		/*
> +		 * Generic must be requested explicitly, otherwise it could
> +		 * be accelerated implementation with highest priority.
> +		 */
> +		scnprintf(full_name, sizeof(full_name), "%s%s", csum_driver,
> +			  (type == CSUM_GENERIC ? "-generic" : ""));
> +
> +		shash1 = crypto_alloc_shash(full_name, 0, mask);
> +		if (IS_ERR(shash1))
> +			return PTR_ERR(shash1);
> +
> +		/* Accelerated requested but generic found */
> +		if (type != CSUM_GENERIC &&
> +		    strstr(crypto_shash_driver_name(shash1), "generic")) {
> +			crypto_free_shash(shash1);
> +			return -ENOENT;
> +		}
> +
> +		tmp = cmpxchg(&fs_info->csum_shash[type], NULL, shash1);
> +		if (tmp) {
> +			/* Something raced in */
> +			crypto_free_shash(shash1);
> +		}
> +	}
> +
> +	/* Select it */
> +	fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[type];
> +	return 0;
> +}
> +
> +static bool strmatch(const char *buffer, const char *string);
> +
> +static ssize_t btrfs_checksum_store(struct kobject *kobj,
> +				    struct kobj_attribute *a,
> +				    const char *buf, size_t len)
> +{
> +	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
> +
> +	if (!fs_info)
> +		return -EPERM;
> +
> +	/* Pick the best available, generic or accelerated */
> +	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {
> +		if (fs_info->csum_shash[CSUM_ACCEL]) {
> +			fs_info->csum_shash[CSUM_DEFAULT] =
> +				fs_info->csum_shash[CSUM_ACCEL];
> +			return len;
> +		}
> +		ASSERT(fs_info->csum_shash[CSUM_GENERIC]);
> +		fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[CSUM_GENERIC];
> +		return len;
> +	}
> +
> +	for (int i = 1; i < CSUM_COUNT; i++) {
> +		if (strmatch(buf, csum_impl[i])) {
> +			int ret;
> +
> +			ret = select_checksum(fs_info, i);
> +			return ret < 0 ? ret : len;
> +		}
> +	}
> +
> +	return -EINVAL;
> +}
> +BTRFS_ATTR_RW(, checksum, btrfs_checksum_show, btrfs_checksum_store);
>   
>   static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
>   		struct kobj_attribute *a, char *buf)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-02 13:00   ` Johannes Thumshirn
@ 2022-08-02 13:21     ` David Sterba
  0 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2022-08-02 13:21 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: David Sterba, linux-btrfs

On Tue, Aug 02, 2022 at 01:00:28PM +0000, Johannes Thumshirn wrote:
> On 02.08.22 14:33, David Sterba wrote:
> > +static bool strmatch(const char *buffer, const char *string);
> > +
> > +static ssize_t btrfs_checksum_store(struct kobject *kobj,
> > +				    struct kobj_attribute *a,
> > +				    const char *buf, size_t len)
> > +{
> > +	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
> > +
> > +	if (!fs_info)
> > +		return -EPERM;
> > +
> > +	/* Pick the best available, generic or accelerated */
> > +	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {
> 
> Same question as with v1, why not sysfs_streq()?

The semantics a bit different with our strmatch that skips initial
whitespace, but otherwise should be same. Nevertheless we should
probablly drop strmatch and use sysfs_streq. I'll change it in this
patch and send another to delete the other one.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-02 12:28 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
@ 2022-08-02 13:00   ` Johannes Thumshirn
  2022-08-02 13:21     ` David Sterba
  2022-08-03  0:06   ` Anand Jain
  2022-08-03  0:22   ` Anand Jain
  2 siblings, 1 reply; 16+ messages in thread
From: Johannes Thumshirn @ 2022-08-02 13:00 UTC (permalink / raw)
  To: David Sterba, linux-btrfs

On 02.08.22 14:33, David Sterba wrote:
> +static bool strmatch(const char *buffer, const char *string);
> +
> +static ssize_t btrfs_checksum_store(struct kobject *kobj,
> +				    struct kobj_attribute *a,
> +				    const char *buf, size_t len)
> +{
> +	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
> +
> +	if (!fs_info)
> +		return -EPERM;
> +
> +	/* Pick the best available, generic or accelerated */
> +	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {

Same question as with v1, why not sysfs_streq()?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 3/4] btrfs: add checksum implementation selection after mount
  2022-08-02 12:28 David Sterba
@ 2022-08-02 12:28 ` David Sterba
  2022-08-02 13:00   ` Johannes Thumshirn
                     ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: David Sterba @ 2022-08-02 12:28 UTC (permalink / raw)
  To: linux-btrfs; +Cc: David Sterba

The sysfs file /sys/fs/btrfs/FSID/checksum shows the filesystem checksum
and the crypto module implementing it. In the scenario when there's only
the default generic implementation available during mount it's selected,
even if there's an unloaded module with accelerated version.

This happens with sha256 that's often built-in so the crypto API may not
trigger loading the modules and select the fastest implementation. Such
filesystem could be left using in the generic for the whole time.
Remount can't fix that and full umount/mount cycle may be impossible eg.
for a root filesystem.

Allow writing strings to the sysfs checksum file that will trigger
loading the crypto shash again and check if the found module is the
desired one.

Possible values:

- default - select whatever is considered default by crypto subsystem,
            either generic or accelerated
- accel   - try loading an accelerated implementation (must not contain
            "generic" in the name)
- generic - load and select the generic implementation

A typical scenario, assuming sha256 is built-in:

  $ mkfs.btrfs --csum sha256
  $ lsmod | grep sha256
  $ mount /dev /mnt
  $ cat ...FSID/checksum
  sha256 (sha256-generic)
  $ modprobe sha256
  $ lsmod | grep sha256
  sha256_ssse3
  $ echo accel > ...FSID/checksum
  sha256 (sha256-ni)

The crypto shash for all slots has the same lifetime as the mount, so
it's not possible to unload the crypto module.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index cc943b236c92..0044644056ed 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1100,7 +1100,91 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
 			  crypto_shash_driver_name(fs_info->csum_shash[CSUM_DEFAULT]));
 }
 
-BTRFS_ATTR(, checksum, btrfs_checksum_show);
+static const char csum_impl[][8] = {
+	[CSUM_DEFAULT]	= "default",
+	[CSUM_GENERIC]	= "generic",
+	[CSUM_ACCEL]	= "accel",
+};
+
+static int select_checksum(struct btrfs_fs_info *fs_info, enum btrfs_csum_impl type)
+{
+	/* Already set */
+	if (fs_info->csum_shash[CSUM_DEFAULT] == fs_info->csum_shash[type])
+		return 0;
+
+	/* Allocate new if needed */
+	if (!fs_info->csum_shash[type]) {
+		const u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+		const char *csum_driver = btrfs_super_csum_driver(csum_type);
+		struct crypto_shash *shash1, *tmp;
+		char full_name[32];
+		u32 mask = 0;
+
+		/*
+		 * Generic must be requested explicitly, otherwise it could
+		 * be accelerated implementation with highest priority.
+		 */
+		scnprintf(full_name, sizeof(full_name), "%s%s", csum_driver,
+			  (type == CSUM_GENERIC ? "-generic" : ""));
+
+		shash1 = crypto_alloc_shash(full_name, 0, mask);
+		if (IS_ERR(shash1))
+			return PTR_ERR(shash1);
+
+		/* Accelerated requested but generic found */
+		if (type != CSUM_GENERIC &&
+		    strstr(crypto_shash_driver_name(shash1), "generic")) {
+			crypto_free_shash(shash1);
+			return -ENOENT;
+		}
+
+		tmp = cmpxchg(&fs_info->csum_shash[type], NULL, shash1);
+		if (tmp) {
+			/* Something raced in */
+			crypto_free_shash(shash1);
+		}
+	}
+
+	/* Select it */
+	fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[type];
+	return 0;
+}
+
+static bool strmatch(const char *buffer, const char *string);
+
+static ssize_t btrfs_checksum_store(struct kobject *kobj,
+				    struct kobj_attribute *a,
+				    const char *buf, size_t len)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+
+	if (!fs_info)
+		return -EPERM;
+
+	/* Pick the best available, generic or accelerated */
+	if (strmatch(buf, csum_impl[CSUM_DEFAULT])) {
+		if (fs_info->csum_shash[CSUM_ACCEL]) {
+			fs_info->csum_shash[CSUM_DEFAULT] =
+				fs_info->csum_shash[CSUM_ACCEL];
+			return len;
+		}
+		ASSERT(fs_info->csum_shash[CSUM_GENERIC]);
+		fs_info->csum_shash[CSUM_DEFAULT] = fs_info->csum_shash[CSUM_GENERIC];
+		return len;
+	}
+
+	for (int i = 1; i < CSUM_COUNT; i++) {
+		if (strmatch(buf, csum_impl[i])) {
+			int ret;
+
+			ret = select_checksum(fs_info, i);
+			return ret < 0 ? ret : len;
+		}
+	}
+
+	return -EINVAL;
+}
+BTRFS_ATTR_RW(, checksum, btrfs_checksum_show, btrfs_checksum_store);
 
 static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
 		struct kobj_attribute *a, char *buf)
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-08-04 11:13 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-29 17:42 [PATCH v2 0/4] Selectable checksum implementation David Sterba
2022-07-29 17:42 ` [PATCH v2 1/4] btrfs: prepare more slots for checksum shash David Sterba
2022-07-29 17:42 ` [PATCH v2 2/4] btrfs: assign checksum shash slots on init David Sterba
2022-07-29 17:42 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
2022-08-01  8:31   ` Johannes Thumshirn
2022-07-29 17:42 ` [PATCH v2 4/4] btrfs: sysfs: print all loaded csums implementations David Sterba
2022-07-29 21:01 ` [PATCH v2 0/4] Selectable checksum implementation Boris Burkov
2022-08-01 18:55   ` David Sterba
2022-08-02 12:28 David Sterba
2022-08-02 12:28 ` [PATCH v2 3/4] btrfs: add checksum implementation selection after mount David Sterba
2022-08-02 13:00   ` Johannes Thumshirn
2022-08-02 13:21     ` David Sterba
2022-08-03  0:06   ` Anand Jain
2022-08-03 14:00     ` David Sterba
2022-08-04 11:12       ` Anand Jain
2022-08-03  0:22   ` Anand Jain
2022-08-03 13:24     ` David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.