linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pranay Srivastava <pranjas@gmail.com>
To: Markus Pargmann <mpa@pengutronix.de>
Cc: nbd-general@lists.sourceforge.net, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 3/5]nbd: make nbd device wait for its users
Date: Sun, 10 Jul 2016 21:32:07 +0530	[thread overview]
Message-ID: <CA+aCy1Fg7fzU302r-KCePLevHzjGzRu-=sOV99fe4T==BSdDJw@mail.gmail.com> (raw)
In-Reply-To: <6092424.rvLJmOdVvL@galactica.lan>

On Sun, Jul 10, 2016 at 6:32 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> On 2016 M06 30, Thu 14:02:03 CEST Pranay Kr. Srivastava wrote:
>> When a timeout occurs or a recv fails, then
>> instead of abruplty killing nbd block device
>> wait for its users to finish.
>>
>> This is more required when filesystem(s) like
>> ext2 or ext3 don't expect their buffer heads to
>> disappear while the filesystem is mounted.
>>
>> Each open of a nbd device is refcounted, while
>> the userland program [nbd-client] doing the
>> NBD_DO_IT ioctl would now wait for any other users
>> of this device before invalidating the nbd device.
>>
>> A timedout or a disconnected device, if in use, can't
>> be used until it has been resetted. The reset happens
>> when all tasks having this bdev open closes this bdev.
>>
>> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>> ---
>>  drivers/block/nbd.c | 106
>> ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 87
>> insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>> index e362d44..fb56dd2 100644
>> --- a/drivers/block/nbd.c
>> +++ b/drivers/block/nbd.c
>> @@ -72,6 +72,8 @@ struct nbd_device {
>>  #endif
>>       /* This is specifically for calling sock_shutdown, for now. */
>>       struct work_struct ws_shutdown;
>> +     struct kref users;
>> +     struct completion user_completion;
>>  };
>>
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> @@ -99,6 +101,8 @@ static int max_part;
>>  static DEFINE_SPINLOCK(nbd_lock);
>>
>>  static void nbd_ws_func_shutdown(struct work_struct *);
>> +static void nbd_kref_release(struct kref *);
>> +static int nbd_size_clear(struct nbd_device *, struct block_device *);
>
> More function signatures. Why?

To avoid code move. But do let me know why is code signature(s)
like this are bad , just asking to avoid such things.

>
>>
>>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>>  {
>> @@ -145,11 +149,9 @@ static int nbd_size_set(struct nbd_device *nbd, struct
>> block_device *bdev, int blocksize, int nr_blocks)
>>  {
>>       int ret;
>> -
>>       ret = set_blocksize(bdev, blocksize);
>>       if (ret)
>>               return ret;
>> -
>
> Unrelated.
>
>>       nbd->blksize = blocksize;
>>       nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
>>
>> @@ -197,6 +199,9 @@ static void nbd_xmit_timeout(unsigned long arg)
>>  {
>>       struct nbd_device *nbd = (struct nbd_device *)arg;
>>
>> +     if (nbd->timedout)
>> +             return;
>> +
>
> What does this have to do with the patch?

to avoid re-scheduling the work function. Apparently that did
cause some trouble with ext4 and 10K dd processes.

>
>>       if (list_empty(&nbd->queue_head))
>>               return;
>>
>> @@ -472,8 +477,6 @@ static int nbd_thread_recv(struct nbd_device *nbd,
>> struct block_device *bdev) nbd_end_request(nbd, req);
>>       }
>>
>> -     nbd_size_clear(nbd, bdev);
>> -
>>       device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
>>
>>       nbd->task_recv = NULL;
>> @@ -650,12 +653,13 @@ static int nbd_set_socket(struct nbd_device *nbd,
>> struct socket *sock) int ret = 0;
>>
>>       spin_lock(&nbd->sock_lock);
>> -     if (nbd->sock)
>> +
>> +     if (nbd->sock || nbd->timedout)
>>               ret = -EBUSY;
>
> nbd->timedout is already checked in __nbd_ioctl(), no need to check it twice.
>
>>       else
>>               nbd->sock = sock;
>> -     spin_unlock(&nbd->sock_lock);
>>
>> +     spin_unlock(&nbd->sock_lock);
>
> random modification.
>
>>       return ret;
>>  }
>>
>> @@ -670,6 +674,7 @@ static void nbd_reset(struct nbd_device *nbd)
>>       nbd->flags = 0;
>>       nbd->xmit_timeout = 0;
>>       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>> +     init_completion(&nbd->user_completion);
>>       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>>       del_timer_sync(&nbd->timeout_timer);
>>  }
>> @@ -704,6 +709,9 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
>>  static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>>                      unsigned int cmd, unsigned long arg)
>>  {
>> +     if (nbd->timedout || nbd->disconnect)
>> +             return -EBUSY;
>> +
>>       switch (cmd) {
>>       case NBD_DISCONNECT: {
>>               struct request sreq;
>> @@ -733,7 +741,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct
>> nbd_device *nbd, nbd_clear_que(nbd);
>>               BUG_ON(!list_empty(&nbd->queue_head));
>>               BUG_ON(!list_empty(&nbd->waiting_queue));
>> -             kill_bdev(bdev);
>>               return 0;
>>
>>       case NBD_SET_SOCK: {
>> @@ -752,7 +759,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct
>> nbd_device *nbd,
>>
>>       case NBD_SET_BLKSIZE: {
>>               loff_t bsize = div_s64(nbd->bytesize, arg);
>> -
>
> random modification.
>
>>               return nbd_size_set(nbd, bdev, arg, bsize);
>>       }
>>
>> @@ -804,22 +810,29 @@ static int __nbd_ioctl(struct block_device *bdev,
>> struct nbd_device *nbd, error = nbd_thread_recv(nbd, bdev);
>>               nbd_dev_dbg_close(nbd);
>>               kthread_stop(thread);
>> -             sock_shutdown(nbd);
>> -
>> -             mutex_lock(&nbd->tx_lock);
>> -             nbd->task_recv = NULL;
>>
>> -             nbd_clear_que(nbd);
>> -             kill_bdev(bdev);
>> -             nbd_bdev_reset(bdev);
>> +             sock_shutdown(nbd);
>>
>>               if (nbd->disconnect) /* user requested, ignore socket errors */
>>                       error = 0;
>>               if (nbd->timedout)
>>                       error = -ETIMEDOUT;
>>
>> -             nbd_reset(nbd);
>> +             mutex_lock(&nbd->tx_lock);
>> +             nbd_clear_que(nbd);
>> +             nbd->disconnect = true; /* To kill bdev*/
>> +             mutex_unlock(&nbd->tx_lock);
>> +             cancel_work_sync(&nbd->ws_shutdown);
>> +             kref_put(&nbd->users, nbd_kref_release);
>> +             wait_for_completion(&nbd->user_completion);
>>
>> +             mutex_lock(&bdev->bd_mutex);
>> +             if (!kref_get_unless_zero(&nbd->users))
>> +                     kref_init(&nbd->users);
>
> This kref usage simply looks wrong and confusing. I commented last time
> already
> that I think atomics will work better. Please discuss with me what you think
> before sending out a new version. Otherwise this patch series will increase in
> version forever.

Alright let's go with atomics.
But why this looks wrong, are you referring to partitioned device?

>
>> +             mutex_unlock(&bdev->bd_mutex);
>> +
>> +             mutex_lock(&nbd->tx_lock);
>> +             nbd_reset(nbd);
>>               return error;
>>       }
>>
>> @@ -857,19 +870,74 @@ static int nbd_ioctl(struct block_device *bdev,
>> fmode_t mode,
>>
>>       return error;
>>  }
>> +static void nbd_kref_release(struct kref *kref_users)
>> +{
>> +     struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
>> +                                             users
>> +                                             );
>> +     schedule_work(&nbd->ws_shutdown);
>
> Do we need to schedule work here?

Yes this is for the kill_bdev part. This is the final kick to bdev which happens
after the wait in NBD_DO_IT.

>
>> +}
>> +
>> +static int nbd_open(struct block_device *bdev, fmode_t mode)
>> +{
>> +     struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
>> +
>> +     if (!kref_get_unless_zero(&nbd_dev->users))
>> +             kref_init(&nbd_dev->users);
>> +
>> +     pr_debug("Opening nbd_dev %s. Active users = %u\n",
>> +                     bdev->bd_disk->disk_name,
>> +                     atomic_read(&nbd_dev->users.refcount)
>> +             );
>> +     return 0;
>> +}
>> +
>> +static void nbd_release(struct gendisk *disk, fmode_t mode)
>> +{
>> +     struct nbd_device *nbd_dev = disk->private_data;
>> +
>> +     kref_put(&nbd_dev->users,  nbd_kref_release);
>> +
>> +     pr_debug("Closing nbd_dev %s. Active users = %u\n",
>> +                     disk->disk_name,
>> +                     atomic_read(&nbd_dev->users.refcount)
>> +             );
>> +}
>>
>>  static const struct block_device_operations nbd_fops = {
>>       .owner =        THIS_MODULE,
>>       .ioctl =        nbd_ioctl,
>>       .compat_ioctl = nbd_ioctl,
>> +     .open =         nbd_open,
>> +     .release =      nbd_release
>>  };
>>
>> +
>
> random modification
>
>>  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>>  {
>>       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
>> -                     ws_shutdown);
>> -
>> -     sock_shutdown(nbd_dev);
>> +                                                     ws_shutdown
>> +                                             );
>
> ...???

Tried to match the brackets... that's what you meant earlier?

>
>> +
>> +     struct block_device *bdev = bdget(part_devt(
>> +                                             dev_to_part(nbd_to_dev(nbd_dev))
>> +                                             )
>> +                                     );
>> +     BUG_ON(!bdev);
>
> A simple check would be enough. Or a warning.

Ok, but that's really a bug.

>
>> +     if (nbd_dev->timedout)
>> +             sock_shutdown(nbd_dev);
>
> This timeout check seems unnecessary. If we do not timeout and the socket was
> already closed, the sock_shutdown() will do nothing.
>
>
> So if I understand you correctly you are trying to block all ioctls while you
> are shutting down which is a well a behaviour change of the ioctl interface.
> Why do you think it is better not to allow any changes until everyone closed
> the blockdevice? Shouldn't there be some control left for the user, for
> example
> CLEAR_SOCK?

Ah... Yes that's indeed what I'm trying to do. Now say if this block
device is mounted
and another nbd-client is trying to disconnect it [CLEAR + DISCONNECT]
then clear
is doing a kill_bdev. Socket already has been disconnected but the
device is just not
usable in this case.

If however we are trying to provide for an error recovery, like live
mounted device
and there's was timeout with all connections teared down and then someone does
a set socket on this? Is this supported currently ?

A change in the CLEAR, like not actually killing bdev would also not be good. So
better avoid such ioctl if device is in use, no?

>
> Regards,
>
> Markus
>
>> +
>> +     if (nbd_dev->disconnect) {
>> +             mutex_lock(&nbd_dev->tx_lock);
>> +             nbd_dev->task_recv = NULL;
>> +             nbd_clear_que(nbd_dev);
>> +             kill_bdev(bdev);
>> +             nbd_bdev_reset(bdev);
>> +             mutex_unlock(&nbd_dev->tx_lock);
>> +             nbd_size_clear(nbd_dev, bdev);
>> +             complete(&nbd_dev->user_completion);
>> +     }
>> +     bdput(bdev);
>>  }
>>
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>
>



-- 
        ---P.K.S

  reply	other threads:[~2016-07-10 16:02 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-30 11:02 [PATCH v4 0/4] nbd: nbd fixes Pranay Kr. Srivastava
2016-06-30 11:02 ` [PATCH v4 1/5]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
2016-07-07 14:56   ` Pranay Srivastava
2016-07-09  7:36     ` Pranay Srivastava
2016-06-30 11:02 ` [PATCH v4 2/5]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
2016-07-04  7:06   ` Pranay Srivastava
2016-07-10 12:25   ` Markus Pargmann
     [not found]     ` <CA+aCy1GZo6Vk9Yy1KXWgyVhcGmVETyuPuhQT=pSVDVxi5qr8ww@mail.gmail.com>
2016-07-13  7:13       ` Markus Pargmann
2016-07-14  5:59         ` Pranay Srivastava
2016-07-16  9:22           ` Pranay Kr Srivastava
2016-07-16  9:22             ` [PATCH v5 2/4] nbd: fix might_sleep warning on socket shutdown Pranay Kr Srivastava
2016-07-16 10:14               ` Pranay Srivastava
2016-06-30 11:02 ` [PATCH v4 3/5]nbd: make nbd device wait for its users Pranay Kr. Srivastava
2016-07-10 13:02   ` Markus Pargmann
2016-07-10 16:02     ` Pranay Srivastava [this message]
2016-07-13  7:54       ` Markus Pargmann
2016-07-14  5:47         ` Pranay Srivastava
2016-07-16 10:36           ` [PATCH v5 3/4] " Pranay Kr Srivastava
2016-07-16 10:42             ` Pranay Srivastava
2016-07-20  7:47             ` Markus Pargmann
2016-06-30 11:02 ` [PATCH v4 4/5]nbd: use i_size_write to assign nbd device size Pranay Kr. Srivastava

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+aCy1Fg7fzU302r-KCePLevHzjGzRu-=sOV99fe4T==BSdDJw@mail.gmail.com' \
    --to=pranjas@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mpa@pengutronix.de \
    --cc=nbd-general@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).