All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anand Jain <anand.jain@oracle.com>
To: fdmanana@kernel.org, linux-btrfs@vger.kernel.org, dsterba@suse.cz
Subject: Re: [PATCH v2] Btrfs: avoid deadlock with memory reclaim due to allocation of devices
Date: Fri, 25 Jan 2019 11:40:13 +0800	[thread overview]
Message-ID: <e99ed6ff-7ade-d690-6c90-9c2bba024c46@oracle.com> (raw)
In-Reply-To: <20190111171759.19920-1-fdmanana@kernel.org>



On 01/12/2019 01:17 AM, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> In a few places we are allocating a device using the GFP_KERNEL flag when
> it is not safe to do so, because if reclaim is triggered it can cause a
> transaction commit while we are holding the device list mutex. This mutex
> is required in the transaction commit path (at write_all_supers() and
> btrfs_update_commit_device_size()).
> 
> So fix this by setting up a nofs memory allocation context in those cases.
> 
> Fixes: 78f2c9e6dbb14 ("btrfs: device add and remove: use GFP_KERNEL")
> Fixes: e0ae999414238 ("btrfs: preallocate device flush bio")
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
> 
> V2: Change the approach to fix the problem by setting up nofs contextes
>      where needed.

  So remaining functions which still does GFP_KERNEL are..

     btrfs_init_dev_replace_tgtdev()
     btrfs_init_new_device()
     add_missing_dev()

  Normally most of the device allocations are already done at
  device_list_add(), except for the special three functions above,
  so can we simplify the fix to [1] as there isn't much toll if
  the above three were also under GFP_NOFS.

[1]
--------------------------------------------
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b2c1d26f577e..3c350ab6535b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -387,7 +387,7 @@ static struct btrfs_device *__alloc_device(void)
  {
         struct btrfs_device *dev;

-       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+       dev = kzalloc(sizeof(*dev), GFP_NOFS);
         if (!dev)
                 return ERR_PTR(-ENOMEM);

@@ -395,7 +395,7 @@ static struct btrfs_device *__alloc_device(void)
          * Preallocate a bio that's always going to be used for 
flushing device
          * barriers and matches the device lifespan
          */
-       dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
+       dev->flush_bio = bio_alloc_bioset(GFP_NOFS, 0, NULL);
         if (!dev->flush_bio) {
                 kfree(dev);
                 return ERR_PTR(-ENOMEM);
--------------------------------------------

Thanks.



>   fs/btrfs/volumes.c | 33 ++++++++++++++++++++++++++++++---
>   1 file changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2576b1a379c9..663566baae78 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -14,6 +14,7 @@
>   #include <linux/semaphore.h>
>   #include <linux/uuid.h>
>   #include <linux/list_sort.h>
> +#include <linux/sched/mm.h>
>   #include "ctree.h"
>   #include "extent_map.h"
>   #include "disk-io.h"
> @@ -988,20 +989,29 @@ static noinline struct btrfs_device *device_list_add(const char *path,
>   	}
>   
>   	if (!device) {
> +		unsigned int nofs_flag;
> +
>   		if (fs_devices->opened) {
>   			mutex_unlock(&fs_devices->device_list_mutex);
>   			return ERR_PTR(-EBUSY);
>   		}
>   
> +		/*
> +		 * Setup nofs context because we are holding the device list
> +		 * mutex, which is required for a transaction commit.
> +		 */
> +		nofs_flag = memalloc_nofs_save();
>   		device = btrfs_alloc_device(NULL, &devid,
>   					    disk_super->dev_item.uuid);
>   		if (IS_ERR(device)) {
> +			memalloc_nofs_restore(nofs_flag);
>   			mutex_unlock(&fs_devices->device_list_mutex);
>   			/* we can safely leave the fs_devices entry around */
>   			return device;
>   		}
>   
> -		name = rcu_string_strdup(path, GFP_NOFS);
> +		name = rcu_string_strdup(path, GFP_KERNEL);
> +		memalloc_nofs_restore(nofs_flag);
>   		if (!name) {
>   			btrfs_free_device(device);
>   			mutex_unlock(&fs_devices->device_list_mutex);
> @@ -1137,11 +1147,19 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
>   	/* We have held the volume lock, it is safe to get the devices. */
>   	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
>   		struct rcu_string *name;
> +		unsigned int nofs_flag;
>   
> +		/*
> +		 * Setup nofs context because we are holding the device list
> +		 * mutex, which is required for a transaction commit.
> +		 */
> +		nofs_flag = memalloc_nofs_save();
>   		device = btrfs_alloc_device(NULL, &orig_dev->devid,
>   					    orig_dev->uuid);
> -		if (IS_ERR(device))
> +		if (IS_ERR(device)) {
> +			memalloc_nofs_restore(nofs_flag);
>   			goto error;
> +		}
>   
>   		/*
>   		 * This is ok to do without rcu read locked because we hold the
> @@ -1151,12 +1169,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
>   			name = rcu_string_strdup(orig_dev->name->str,
>   					GFP_KERNEL);
>   			if (!name) {
> +				memalloc_nofs_restore(nofs_flag);
>   				btrfs_free_device(device);
>   				goto error;
>   			}
>   			rcu_assign_pointer(device->name, name);
>   		}
>   
> +		memalloc_nofs_restore(nofs_flag);
>   		list_add(&device->dev_list, &fs_devices->devices);
>   		device->fs_devices = fs_devices;
>   		fs_devices->num_devices++;
> @@ -1262,6 +1282,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>   	struct btrfs_fs_devices *fs_devices = device->fs_devices;
>   	struct btrfs_device *new_device;
>   	struct rcu_string *name;
> +	unsigned int nofs_flag;
>   
>   	if (device->bdev)
>   		fs_devices->open_devices--;
> @@ -1277,17 +1298,23 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>   
>   	btrfs_close_bdev(device);
>   
> +	/*
> +	 * Setup nofs context because we are holding the device list
> +	 * mutex, which is required for a transaction commit.
> +	 */
> +	nofs_flag = memalloc_nofs_save();
>   	new_device = btrfs_alloc_device(NULL, &device->devid,
>   					device->uuid);
>   	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
>   
>   	/* Safe because we are under uuid_mutex */
>   	if (device->name) {
> -		name = rcu_string_strdup(device->name->str, GFP_NOFS);
> +		name = rcu_string_strdup(device->name->str, GFP_KERNEL);
>   		BUG_ON(!name); /* -ENOMEM */
>   		rcu_assign_pointer(new_device->name, name);
>   	}
>   
> +	memalloc_nofs_restore(nofs_flag);
>   	list_replace_rcu(&device->dev_list, &new_device->dev_list);
>   	new_device->fs_devices = device->fs_devices;
>   
> 

      parent reply	other threads:[~2019-01-25  3:40 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-13 21:17 [PATCH] Btrfs: avoid deadlock with memory reclaim due to allocation of devices fdmanana
2018-12-14  7:27 ` Nikolay Borisov
2019-01-08 11:51 ` Filipe Manana
2019-01-09 18:26 ` David Sterba
2019-01-09 19:48   ` Filipe Manana
2019-01-10  7:32     ` Anand Jain
2019-01-10  7:03   ` Nikolay Borisov
2019-01-11 17:17 ` [PATCH v2] " fdmanana
2019-01-14  8:21   ` Anand Jain
2019-01-18 18:07     ` David Sterba
2019-01-25  2:56       ` Anand Jain
2019-01-25  3:40   ` Anand Jain [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e99ed6ff-7ade-d690-6c90-9c2bba024c46@oracle.com \
    --to=anand.jain@oracle.com \
    --cc=dsterba@suse.cz \
    --cc=fdmanana@kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.