Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: Vyacheslav Dubeyko <slava@dubeyko.com>
To: Naohiro Aota <naohiro.aota@wdc.com>,
	linux-btrfs@vger.kernel.org, David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	Nikolay Borisov <nborisov@suse.com>,
	Damien Le Moal <damien.lemoal@wdc.com>,
	Johannes Thumshirn <jthumshirn@suse.de>,
	Hannes Reinecke <hare@suse.com>,
	Anand Jain <anand.jain@oracle.com>,
	linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH] libblkid: implement zone-aware probing for HMZONED btrfs
Date: Wed, 04 Dec 2019 15:15:32 +0300
Message-ID: <5eb099b6886358f3a478658e25a26a42ab674e7f.camel@dubeyko.com> (raw)
In-Reply-To: <20191204083023.861495-1-naohiro.aota@wdc.com>

On Wed, 2019-12-04 at 17:30 +0900, Naohiro Aota wrote:
> This is a proof-of-concept patch to make libblkid zone-aware. It can
> probe the magic located at some offset from the beginning of some
> specific zone of a device.
> 
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  libblkid/src/blkidP.h            |   4 +
>  libblkid/src/probe.c             |  25 +++++-
>  libblkid/src/superblocks/btrfs.c | 132
> ++++++++++++++++++++++++++++++-
>  3 files changed, 157 insertions(+), 4 deletions(-)
> 
> diff --git a/libblkid/src/blkidP.h b/libblkid/src/blkidP.h
> index f9bbe008406f..5bb6771ee9c6 100644
> --- a/libblkid/src/blkidP.h
> +++ b/libblkid/src/blkidP.h
> @@ -148,6 +148,10 @@ struct blkid_idmag
>  
>  	long		kboff;		/* kilobyte offset of
> superblock */
>  	unsigned int	sboff;		/* byte offset within
> superblock */
> +
> +	int		is_zone;
> +	long		zonenum;
> +	long		kboff_inzone;
>  };

Maybe, it makes sense to add the comments for added fields? How do you
feel?

>  
>  /*
> diff --git a/libblkid/src/probe.c b/libblkid/src/probe.c
> index f6dd5573d5dd..56e42ac28559 100644
> --- a/libblkid/src/probe.c
> +++ b/libblkid/src/probe.c
> @@ -94,6 +94,7 @@
>  #ifdef HAVE_LINUX_CDROM_H
>  #include <linux/cdrom.h>
>  #endif
> +#include <linux/blkzoned.h>
>  #ifdef HAVE_SYS_STAT_H
>  #include <sys/stat.h>
>  #endif
> @@ -1009,8 +1010,25 @@ int blkid_probe_get_idmag(blkid_probe pr,
> const struct blkid_idinfo *id,
>  	/* try to detect by magic string */
>  	while(mag && mag->magic) {
>  		unsigned char *buf;
> -
> -		off = (mag->kboff + (mag->sboff >> 10)) << 10;
> +		uint64_t kboff;
> +
> +		if (!mag->is_zone)
> +			kboff = mag->kboff;
> +		else {
> +			uint32_t zone_size_sector;
> +			int ret;
> +
> +			ret = ioctl(pr->fd, BLKGETZONESZ,
> &zone_size_sector);
> +			if (ret == EOPNOTSUPP)

-EOPNOTSUPP??? Or this is the libblk peculiarity?

> +				goto next;
> +			if (ret)
> +				return -errno;
> +			if (zone_size_sector == 0)
> +				goto next;
> +			kboff = (mag->zonenum * (zone_size_sector <<
> 9)) >> 10;
> +			kboff += mag->kboff_inzone;
> +		}
> +		off = (kboff + (mag->sboff >> 10)) << 10;
>  		buf = blkid_probe_get_buffer(pr, off, 1024);
>  
>  		if (!buf && errno)
> @@ -1020,13 +1038,14 @@ int blkid_probe_get_idmag(blkid_probe pr,
> const struct blkid_idinfo *id,
>  				buf + (mag->sboff & 0x3ff), mag->len))
> {
>  
>  			DBG(LOWPROBE, ul_debug("\tmagic sboff=%u,
> kboff=%ld",
> -				mag->sboff, mag->kboff));
> +				mag->sboff, kboff));
>  			if (offset)
>  				*offset = off + (mag->sboff & 0x3ff);
>  			if (res)
>  				*res = mag;
>  			return BLKID_PROBE_OK;
>  		}
> +next:
>  		mag++;
>  	}
>  
> diff --git a/libblkid/src/superblocks/btrfs.c
> b/libblkid/src/superblocks/btrfs.c
> index f0fde700d896..4254220ef423 100644
> --- a/libblkid/src/superblocks/btrfs.c
> +++ b/libblkid/src/superblocks/btrfs.c
> @@ -9,6 +9,9 @@
>  #include <unistd.h>
>  #include <string.h>
>  #include <stdint.h>
> +#include <stdbool.h>
> +
> +#include <linux/blkzoned.h>
>  
>  #include "superblocks.h"
>  
> @@ -59,11 +62,131 @@ struct btrfs_super_block {
>  	uint8_t label[256];
>  } __attribute__ ((__packed__));
>  
> +#define BTRFS_SUPER_INFO_SIZE 4096

I believe that 4K is very widely used constant.
Are you sure that it needs to introduce some
additional constant? Especially, it looks slightly
strange to see the BTRFS specialized constant.
Maybe, it needs to generalize the constant? 

> +#define SECTOR_SHIFT 9

Are you sure that libblkid hasn't such constant?

> +
> +#define READ 0
> +#define WRITE 1
> +
> +typedef uint64_t u64;
> +typedef uint64_t sector_t;

I see the point to introduce the sector_t type.
But is it really necessary to introduce the u64 type?

> +
> +static int sb_write_pointer(struct blk_zone *zones, u64 *wp_ret)
> +{
> +	bool empty[2];
> +	bool full[2];
> +	sector_t sector;
> +
> +	if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
> +		*wp_ret = zones[0].start << SECTOR_SHIFT;
> +		return -ENOENT;
> +	}
> +
> +	empty[0] = zones[0].cond == BLK_ZONE_COND_EMPTY;
> +	empty[1] = zones[1].cond == BLK_ZONE_COND_EMPTY;
> +	full[0] = zones[0].cond == BLK_ZONE_COND_FULL;
> +	full[1] = zones[1].cond == BLK_ZONE_COND_FULL;
> +
> +	/*
> +	 * Possible state of log buffer zones
> +	 *
> +	 *   E I F
> +	 * E * x 0
> +	 * I 0 x 0
> +	 * F 1 1 x
> +	 *
> +	 * Row: zones[0]
> +	 * Col: zones[1]
> +	 * State:
> +	 *   E: Empty, I: In-Use, F: Full
> +	 * Log position:
> +	 *   *: Special case, no superblock is written
> +	 *   0: Use write pointer of zones[0]
> +	 *   1: Use write pointer of zones[1]
> +	 *   x: Invalid state
> +	 */
> +
> +	if (empty[0] && empty[1]) {
> +		/* special case to distinguish no superblock to read */
> +		*wp_ret = zones[0].start << SECTOR_SHIFT;


So, even if we return the error then somebody will check
the *wp_ret value? Looks slightly unexpected.

> +		return -ENOENT;
> +	} else if (full[0] && full[1]) {
> +		/* cannot determine which zone has the newer superblock
> */
> +		return -EUCLEAN;
> +	} else if (!full[0] && (empty[1] || full[1])) {
> +		sector = zones[0].wp;
> +	} else if (full[0]) {
> +		sector = zones[1].wp;
> +	} else {
> +		return -EUCLEAN;
> +	}
> +	*wp_ret = sector << SECTOR_SHIFT;
> +	return 0;
> +}
> +
> +static int sb_log_offset(uint32_t zone_size_sector, blkid_probe pr,
> +			 uint64_t *offset_ret)
> +{
> +	uint32_t zone_num = 0;
> +	struct blk_zone_report *rep;
> +	struct blk_zone *zones;
> +	size_t rep_size;
> +	int ret;
> +	uint64_t wp;
> +
> +	rep_size = sizeof(struct blk_zone_report) + sizeof(struct
> blk_zone) * 2;
> +	rep = malloc(rep_size);
> +	if (!rep)
> +		return -errno;
> +
> +	memset(rep, 0, rep_size);
> +	rep->sector = zone_num * zone_size_sector;
> +	rep->nr_zones = 2;
> +
> +	ret = ioctl(pr->fd, BLKREPORTZONE, rep);
> +	if (ret)
> +		return -errno;

So, the valid case if ioctl returns 0? Am I correct?


> +	if (rep->nr_zones != 2) {
> +		free(rep);
> +		return 1;
> +	}
> +
> +	zones = (struct blk_zone *)(rep + 1);
> +
> +	ret = sb_write_pointer(zones, &wp);
> +	if (ret != -ENOENT && ret)
> +		return -EIO;


If ret is positive then we could return the error. Am I correct?


> +	if (ret != -ENOENT) {
> +		if (wp == zones[0].start << SECTOR_SHIFT)
> +			wp = (zones[1].start + zones[1].len) <<
> SECTOR_SHIFT;
> +		wp -= BTRFS_SUPER_INFO_SIZE;
> +	}
> +	*offset_ret = wp;
> +
> +	return 0;
> +}
> +
>  static int probe_btrfs(blkid_probe pr, const struct blkid_idmag
> *mag)
>  {
>  	struct btrfs_super_block *bfs;
> +	uint32_t zone_size_sector;
> +	int ret;
> +
> +	ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);
> +	if (ret)
> +		return errno;

You returned -errno for another ioctls above. Is everything correct
here?

> +	if (zone_size_sector != 0) {
> +		uint64_t offset = 0;
>  
> -	bfs = blkid_probe_get_sb(pr, mag, struct btrfs_super_block);
> +		ret = sb_log_offset(zone_size_sector, pr, &offset);
> +		if (ret)
> +			return ret;

What about a positive value of ret? I suppose it needs to return ret
only if we have an error. Am I correct?

Thanks,
Viacheslav Dubeyko.

> +		bfs = (struct btrfs_super_block*)
> +			blkid_probe_get_buffer(pr, offset,
> +					       sizeof(struct
> btrfs_super_block));
> +	} else {
> +		bfs = blkid_probe_get_sb(pr, mag, struct
> btrfs_super_block);
> +	}
>  	if (!bfs)
>  		return errno ? -errno : 1;
>  
> @@ -88,6 +211,13 @@ const struct blkid_idinfo btrfs_idinfo =
>  	.magics		=
>  	{
>  	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40, .kboff = 64
> },
> +	  /* for HMZONED btrfs */
> +	  { .magic = "!BHRfS_M", .len = 8, .sboff = 0x40,
> +	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
> +	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
> +	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
> +	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
> +	    .is_zone = 1, .zonenum = 1, .kboff_inzone = 0 },
>  	  { NULL }
>  	}
>  };


  reply index

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-04  8:24 [PATCH v5 00/15] btrfs-progs: zoned block device support Naohiro Aota
2019-12-04  8:24 ` [PATCH v5 01/15] btrfs-progs: utils: Introduce queue_param helper function Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 02/15] btrfs-progs: introduce raid parameters variables Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 03/15] btrfs-progs: build: Check zoned block device support Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 04/15] btrfs-progs: add new HMZONED feature flag Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 05/15] btrfs-progs: Introduce zone block device helper functions Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 06/15] btrfs-progs: load and check zone information Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 07/15] btrfs-progs: support discarding zoned device Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 08/15] btrfs-progs: support zero out on zoned block device Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 09/15] btrfs-progs: implement log-structured superblock for HMZONED mode Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 10/15] btrfs-progs: align device extent allocation to zone boundary Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 11/15] btrfs-progs: do sequential allocation in HMZONED mode Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 12/15] btrfs-progs: redirty clean extent buffers in seq Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 13/15] btrfs-progs: mkfs: Zoned block device support Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 14/15] btrfs-progs: device-add: support HMZONED device Naohiro Aota
2019-12-04  8:25 ` [PATCH v5 15/15] btrfs-progs: introduce support for device replace " Naohiro Aota
2019-12-04  8:30 ` [PATCH] libblkid: implement zone-aware probing for HMZONED btrfs Naohiro Aota
2019-12-04 12:15   ` Vyacheslav Dubeyko [this message]
2019-12-06  7:03     ` Naohiro Aota
2019-12-06 15:22       ` David Sterba
2019-12-05 14:51   ` Karel Zak
2019-12-06  7:06     ` Naohiro Aota

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5eb099b6886358f3a478658e25a26a42ab674e7f.camel@dubeyko.com \
    --to=slava@dubeyko.com \
    --cc=anand.jain@oracle.com \
    --cc=clm@fb.com \
    --cc=damien.lemoal@wdc.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=jthumshirn@suse.de \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    --cc=nborisov@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org
	public-inbox-index linux-btrfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git