All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Evan Green <evgreen@chromium.org>
Cc: axboe@kernel.dk, Gwendal Grignou <gwendal@chromium.org>,
	asavery@chromium.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/2] loop: Better discard support for block devices
Date: Wed, 5 Dec 2018 09:10:59 +0800	[thread overview]
Message-ID: <20181205011059.GA17845@ming.t460p> (raw)
In-Reply-To: <CAE=gft7Fyq+Jb3ZG3PSTR=C0Wjt3uu57+H2bshM6Gu=_0W0YQw@mail.gmail.com>

On Tue, Dec 04, 2018 at 02:19:46PM -0800, Evan Green wrote:
> Hi Ming,
> 
> On Tue, Nov 27, 2018 at 5:26 PM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > On Tue, Oct 30, 2018 at 04:06:24PM -0700, Evan Green wrote:
> > > If the backing device for a loop device is a block device,
> > > then mirror the discard properties of the underlying block
> > > device into the loop device. While in there, differentiate
> > > between REQ_OP_DISCARD and REQ_OP_WRITE_ZEROES, which are
> > > different for block devices, but which the loop device had
> > > just been lumping together.
> > >
> > > Signed-off-by: Evan Green <evgreen@chromium.org>
> > > ---
> > >
> > >  drivers/block/loop.c | 61 +++++++++++++++++++++++++++++++++++-----------------
> > >  1 file changed, 41 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> > > index 28990fc94841a..176e65101c4ef 100644
> > > --- a/drivers/block/loop.c
> > > +++ b/drivers/block/loop.c
> > > @@ -417,19 +417,14 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
> > >       return ret;
> > >  }
> > >
> > > -static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
> > > +static int lo_discard(struct loop_device *lo, struct request *rq,
> > > +             int mode, loff_t pos)
> > >  {
> > > -     /*
> > > -      * We use punch hole to reclaim the free space used by the
> > > -      * image a.k.a. discard. However we do not support discard if
> > > -      * encryption is enabled, because it may give an attacker
> > > -      * useful information.
> > > -      */
> > >       struct file *file = lo->lo_backing_file;
> > > -     int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
> > > +     struct request_queue *q = lo->lo_queue;
> > >       int ret;
> > >
> > > -     if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
> > > +     if (!blk_queue_discard(q)) {
> > >               ret = -EOPNOTSUPP;
> > >               goto out;
> > >       }
> > > @@ -603,8 +598,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
> > >       case REQ_OP_FLUSH:
> > >               return lo_req_flush(lo, rq);
> > >       case REQ_OP_DISCARD:
> > > +             return lo_discard(lo, rq,
> > > +                     FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos);
> > > +
> > >       case REQ_OP_WRITE_ZEROES:
> > > -             return lo_discard(lo, rq, pos);
> > > +             return lo_discard(lo, rq,
> > > +                     FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, pos);
> > > +
> > >       case REQ_OP_WRITE:
> > >               if (lo->transfer)
> > >                       return lo_write_transfer(lo, rq, pos);
> > > @@ -859,6 +859,25 @@ static void loop_config_discard(struct loop_device *lo)
> > >       struct file *file = lo->lo_backing_file;
> > >       struct inode *inode = file->f_mapping->host;
> > >       struct request_queue *q = lo->lo_queue;
> > > +     struct request_queue *backingq;
> > > +
> > > +     /*
> > > +      * If the backing device is a block device, mirror its discard
> > > +      * capabilities.
> > > +      */
> > > +     if (S_ISBLK(inode->i_mode)) {
> > > +             backingq = bdev_get_queue(inode->i_bdev);
> > > +             blk_queue_max_discard_sectors(q,
> > > +                     backingq->limits.max_discard_sectors);
> > > +
> > > +             blk_queue_max_write_zeroes_sectors(q,
> > > +                     backingq->limits.max_write_zeroes_sectors);
> > > +
> > > +             q->limits.discard_granularity =
> > > +                     backingq->limits.discard_granularity;
> > > +
> > > +             q->limits.discard_alignment =
> > > +                     backingq->limits.discard_alignment;
> >
> > I think it isn't necessary to mirror backing queue's discard/write_zeros
> > capabilities, given either fs of the underlying queue can deal with well.
> >
> > >
> > >       /*
> > >        * We use punch hole to reclaim the free space used by the
> > > @@ -866,22 +885,24 @@ static void loop_config_discard(struct loop_device *lo)
> > >        * encryption is enabled, because it may give an attacker
> > >        * useful information.
> > >        */
> > > -     if ((!file->f_op->fallocate) ||
> > > -         lo->lo_encrypt_key_size) {
> > > +     } else if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
> > >               q->limits.discard_granularity = 0;
> > >               q->limits.discard_alignment = 0;
> > >               blk_queue_max_discard_sectors(q, 0);
> > >               blk_queue_max_write_zeroes_sectors(q, 0);
> > > -             blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
> > > -             return;
> > > -     }
> > >
> > > -     q->limits.discard_granularity = inode->i_sb->s_blocksize;
> > > -     q->limits.discard_alignment = 0;
> > > +     } else {
> > > +             q->limits.discard_granularity = inode->i_sb->s_blocksize;
> > > +             q->limits.discard_alignment = 0;
> > > +
> > > +             blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
> > > +             blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
> > > +     }
> > >
> > > -     blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
> > > -     blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
> > > -     blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > > +     if (q->limits.max_discard_sectors || q->limits.max_write_zeroes_sectors)
> > > +             blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > > +     else
> > > +             blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
> > >  }
> >
> > Looks it should work just by mirroring backing queue's discard
> > capability to loop queue in case that the loop is backed by
> > block device, doesn't it? Meantime the unified discard limits &
> > write_zeros limits can be kept.
> 
> I tested this out, and you're right that I could just flip the
> QUEUE_FLAG_DISCARD based on whether its a block device, and leave

What I meant actually is to do the following discard config:

	bool discard;
	if (S_ISBLK(inode->i_mode)) {
		struct request_queue *backingq = bdev_get_queue(inode->i_bdev);
		discard = blk_queue_discard(backingq);
	} else if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size)
		discard = false;
	else
		discard = true;

	if (discard) {
		blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
		blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
	} else {
		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
	}

> everything else alone, to completely disable discard support for loop
> devices backed by block devices. This seems to work for programs like
> mkfs.ext4, but still leaves problems for coreutils cp.
> 
> But is that really the right call? With this change, we're not only
> able to use loop devices in this way, but we're able to use the
> discard and zero functionality of the underlying block device by
> simply passing it through. To me that seemed better than disabling all
> discard support for block devices, which would severely slow us down
> on some devices.

I guess the above approach can do the same job with yours, but simpler.

thanks,
Ming

  reply	other threads:[~2018-12-05  1:11 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-30 23:06 [PATCH 0/2] loop: Better discard for block devices Evan Green
2018-10-30 23:06 ` [PATCH 1/2] loop: Report EOPNOTSUPP properly Evan Green
2018-11-28  1:06   ` Ming Lei
2018-10-30 23:06 ` [PATCH 2/2] loop: Better discard support for block devices Evan Green
2018-11-26 18:53   ` Evan Green
2018-11-27  2:55     ` Ming Lei
2018-11-27 23:34       ` Evan Green
2018-11-28  1:28         ` Ming Lei
2018-11-28  1:26   ` Ming Lei
2018-12-04 22:19     ` Evan Green
2018-12-05  1:10       ` Ming Lei [this message]
2018-12-05 19:35         ` Evan Green
2018-12-06  0:22           ` Ming Lei
2018-12-06  3:15           ` Martin K. Petersen
2018-12-10 17:31             ` Evan Green
2018-12-18 23:48               ` Evan Green
2018-10-30 23:50 ` [PATCH 0/2] loop: Better discard " Bart Van Assche
2018-10-30 23:50   ` Bart Van Assche
2018-11-01 18:15   ` Evan Green
2018-11-01 22:41     ` Gwendal Grignou
2018-11-01 22:44     ` Gwendal Grignou
2018-11-02 16:02       ` Bart Van Assche
2018-11-05 20:35         ` Evan Green
2020-03-17 15:11 [PATCH 0/2] loop: Better discard support " Andrzej Pietrasiewicz
2020-03-17 15:11 ` [PATCH 2/2] " Andrzej Pietrasiewicz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181205011059.GA17845@ming.t460p \
    --to=ming.lei@redhat.com \
    --cc=asavery@chromium.org \
    --cc=axboe@kernel.dk \
    --cc=evgreen@chromium.org \
    --cc=gwendal@chromium.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.