linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nikolay Borisov <nborisov@suse.com>
To: Qu Wenruo <wqu@suse.com>, linux-btrfs@vger.kernel.org
Cc: Josef Bacik <josef@toxicpanda.com>
Subject: Re: [PATCH v7 1/5] btrfs: Introduce per-profile available space facility
Date: Thu, 20 Feb 2020 14:49:34 +0200	[thread overview]
Message-ID: <329cf703-f3f2-4583-73bf-c90c9e001956@suse.com> (raw)
In-Reply-To: <20200211051153.19466-2-wqu@suse.com>

<snip>

> 
> Suggested-by: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/volumes.c | 216 ++++++++++++++++++++++++++++++++++++++++-----
>  fs/btrfs/volumes.h |  11 +++
>  2 files changed, 207 insertions(+), 20 deletions(-)
> 

<snip>

> +
> +/*
> + * Return 0 if we allocated any virtual(*) chunk, and restore the size to
> + * @allocated_size
> + * Return -ENOSPC if we have no more space to allocate virtual chunk
> + *
> + * *: virtual chunk is a space holder for per-profile available space
> + *    calculator.
> + *    Such virtual chunks won't take on-disk space, thus called virtual, and
> + *    only affects per-profile available space calulation.
> + */

Document that the last parameter is an output value which contains the
size of the allocated virtual chunk.

> +static int alloc_virtual_chunk(struct btrfs_fs_info *fs_info,
> +			       struct btrfs_device_info *devices_info,
> +			       enum btrfs_raid_types type,
> +			       u64 *allocated)
> +{
> +	const struct btrfs_raid_attr *raid_attr = &btrfs_raid_array[type];
> +	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +	struct btrfs_device *device;
> +	u64 stripe_size;
> +	int i;
> +	int ndevs = 0;
> +
> +	lockdep_assert_held(&fs_info->chunk_mutex);
> +
> +	/* Go through devices to collect their unallocated space */
> +	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
> +		u64 avail;
> +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
> +					&device->dev_state) ||
> +		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
> +			continue;
> +
> +		if (device->total_bytes > device->bytes_used +
> +				device->virtual_allocated)
> +			avail = device->total_bytes - device->bytes_used -
> +				device->virtual_allocated;
> +		else
> +			avail = 0;
> +
> +		/* And exclude the [0, 1M) reserved space */
> +		if (avail > SZ_1M)
> +			avail -= SZ_1M;
> +		else
> +			avail = 0;
> +
> +		if (avail < fs_info->sectorsize)
> +			continue;
> +		/*
> +		 * Unlike chunk allocator, we don't care about stripe or hole
> +		 * size, so here we use @avail directly
> +		 */
> +		devices_info[ndevs].dev_offset = 0;
> +		devices_info[ndevs].total_avail = avail;
> +		devices_info[ndevs].max_avail = avail;
> +		devices_info[ndevs].dev = device;
> +		++ndevs;
> +	}
> +	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
> +	     btrfs_cmp_device_info, NULL);
> +	ndevs -= ndevs % raid_attr->devs_increment;

nit: ndevs = rounddown(ndevs, raid_attr->devs_increment);
makes it more clear what's going on. Since you are working with at most
int it's not a problem for 32bits.


> +	if (ndevs < raid_attr->devs_min)
> +		return -ENOSPC;
> +	if (raid_attr->devs_max)
> +		ndevs = min(ndevs, (int)raid_attr->devs_max);
> +	else
> +		ndevs = min(ndevs, (int)BTRFS_MAX_DEVS(fs_info));

Instead of casting simply use min_t(int, ndevs, BTRFS_MAX_DEVS...)

> +
> +	/*
> +	 * Now allocate a virtual chunk using the unallocate space of the

nit: missing d after 'unallocate'

> +	 * device with the least unallocated space.
> +	 */
> +	stripe_size = round_down(devices_info[ndevs - 1].total_avail,
> +				 fs_info->sectorsize);
> +	if (stripe_size == 0)
> +		return -ENOSPC;

Isn't this check redundant - in the loop where you iterate the devices
you always ensure total_avail is at least a sector size, this guarantees
that stripe_size cannot be 0 at this point.

> +
> +	for (i = 0; i < ndevs; i++)
> +		devices_info[i].dev->virtual_allocated += stripe_size;
> +	*allocated = stripe_size * (ndevs - raid_attr->nparity) /
> +		     raid_attr->ncopies;
> +	return 0;
> +}
> +
> +static int calc_one_profile_avail(struct btrfs_fs_info *fs_info,
> +				  enum btrfs_raid_types type,
> +				  u64 *result_ret)
> +{
> +	struct btrfs_device_info *devices_info = NULL;
> +	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +	struct btrfs_device *device;
> +	u64 allocated;
> +	u64 result = 0;
> +	int ret = 0;
> +

lockdep assert that chunk mutex is held since you access alloc_list.

> +	ASSERT(type >= 0 && type < BTRFS_NR_RAID_TYPES);
> +
> +	/* Not enough devices, quick exit, just update the result */
> +	if (fs_devices->rw_devices < btrfs_raid_array[type].devs_min)
> +		goto out;

You can directly return.

> +
> +	devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
> +			       GFP_NOFS);
> +	if (!devices_info) {
> +		ret = -ENOMEM;
> +		goto out;

Same here.

> +	}
> +	/* Clear virtual chunk used space for each device */
> +	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list)
> +		device->virtual_allocated = 0;
> +	while (ret == 0) {
> +		ret = alloc_virtual_chunk(fs_info, devices_info, type,
> +					  &allocated);
The 'allocated' variable is used only in this loop so declare it in the
loop. Ideally we want variables to have the shortest possible lifecycle.

> +		if (ret == 0)
> +			result += allocated;
> +	}

Why do you need to call this in a loop calling alloc_virtual_chunk once
simply calculate the available space for the given raid type.

> +	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list)
> +		device->virtual_allocated = 0;
> +out:
> +	kfree(devices_info);
> +	if (ret < 0 && ret != -ENOSPC)
> +		return ret;
> +	*result_ret = result;
> +	return 0;
> +}

<snip>

> @@ -259,6 +266,10 @@ struct btrfs_fs_devices {
>  	struct kobject fsid_kobj;
>  	struct kobject *devices_kobj;
>  	struct completion kobj_unregister;
> +
> +	/* Records per-type available space */
> +	spinlock_t per_profile_lock;
> +	u64 per_profile_avail[BTRFS_NR_RAID_TYPES];

Since every avail quantity is independent of any other, can you turn
this into an array of atomic64 values and get rid of the spinlock? My
head spins how many locks we have in btrfs.

>  };
>  
>  #define BTRFS_BIO_INLINE_CSUM_SIZE	64
> 

  parent reply	other threads:[~2020-02-20 12:49 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-11  5:11 [PATCH v7 0/5] Introduce per-profile available space array to avoid over-confident can_overcommit() Qu Wenruo
2020-02-11  5:11 ` [PATCH v7 1/5] btrfs: Introduce per-profile available space facility Qu Wenruo
2020-02-13 17:12   ` kbuild test robot
2020-02-20 12:49   ` Nikolay Borisov [this message]
2020-02-20 12:59     ` Qu Wenruo
2020-02-20 13:19       ` Nikolay Borisov
2020-02-21  5:16     ` Qu Wenruo
2020-02-11  5:11 ` [PATCH v7 2/5] btrfs: space-info: Use per-profile available space in can_overcommit() Qu Wenruo
2020-02-11  5:11 ` [PATCH v7 3/5] btrfs: statfs: Use pre-calculated per-profile available space Qu Wenruo
2020-02-11  5:11 ` [PATCH v7 4/5] btrfs: Reset device size when btrfs_update_device() failed in btrfs_grow_device() Qu Wenruo
2020-02-11  5:11 ` [PATCH v7 5/5] btrfs: volumes: Revert device used bytes when calc_per_profile_avail() failed Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=329cf703-f3f2-4583-73bf-c90c9e001956@suse.com \
    --to=nborisov@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).