Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Evan Green <evgreen@chromium.org>
Cc: axboe@kernel.dk, Gwendal Grignou <gwendal@chromium.org>,
	asavery@chromium.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/2] loop: Better discard support for block devices
Date: Wed, 5 Dec 2018 09:10:59 +0800
Message-ID: <20181205011059.GA17845@ming.t460p> (raw)
In-Reply-To: <CAE=gft7Fyq+Jb3ZG3PSTR=C0Wjt3uu57+H2bshM6Gu=_0W0YQw@mail.gmail.com>

On Tue, Dec 04, 2018 at 02:19:46PM -0800, Evan Green wrote:
> Hi Ming,
> 
> On Tue, Nov 27, 2018 at 5:26 PM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > On Tue, Oct 30, 2018 at 04:06:24PM -0700, Evan Green wrote:
> > > If the backing device for a loop device is a block device,
> > > then mirror the discard properties of the underlying block
> > > device into the loop device. While in there, differentiate
> > > between REQ_OP_DISCARD and REQ_OP_WRITE_ZEROES, which are
> > > different for block devices, but which the loop device had
> > > just been lumping together.
> > >
> > > Signed-off-by: Evan Green <evgreen@chromium.org>
> > > ---
> > >
> > >  drivers/block/loop.c | 61 +++++++++++++++++++++++++++++++++++-----------------
> > >  1 file changed, 41 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> > > index 28990fc94841a..176e65101c4ef 100644
> > > --- a/drivers/block/loop.c
> > > +++ b/drivers/block/loop.c
> > > @@ -417,19 +417,14 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
> > >       return ret;
> > >  }
> > >
> > > -static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
> > > +static int lo_discard(struct loop_device *lo, struct request *rq,
> > > +             int mode, loff_t pos)
> > >  {
> > > -     /*
> > > -      * We use punch hole to reclaim the free space used by the
> > > -      * image a.k.a. discard. However we do not support discard if
> > > -      * encryption is enabled, because it may give an attacker
> > > -      * useful information.
> > > -      */
> > >       struct file *file = lo->lo_backing_file;
> > > -     int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
> > > +     struct request_queue *q = lo->lo_queue;
> > >       int ret;
> > >
> > > -     if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
> > > +     if (!blk_queue_discard(q)) {
> > >               ret = -EOPNOTSUPP;
> > >               goto out;
> > >       }
> > > @@ -603,8 +598,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
> > >       case REQ_OP_FLUSH:
> > >               return lo_req_flush(lo, rq);
> > >       case REQ_OP_DISCARD:
> > > +             return lo_discard(lo, rq,
> > > +                     FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos);
> > > +
> > >       case REQ_OP_WRITE_ZEROES:
> > > -             return lo_discard(lo, rq, pos);
> > > +             return lo_discard(lo, rq,
> > > +                     FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, pos);
> > > +
> > >       case REQ_OP_WRITE:
> > >               if (lo->transfer)
> > >                       return lo_write_transfer(lo, rq, pos);
> > > @@ -859,6 +859,25 @@ static void loop_config_discard(struct loop_device *lo)
> > >       struct file *file = lo->lo_backing_file;
> > >       struct inode *inode = file->f_mapping->host;
> > >       struct request_queue *q = lo->lo_queue;
> > > +     struct request_queue *backingq;
> > > +
> > > +     /*
> > > +      * If the backing device is a block device, mirror its discard
> > > +      * capabilities.
> > > +      */
> > > +     if (S_ISBLK(inode->i_mode)) {
> > > +             backingq = bdev_get_queue(inode->i_bdev);
> > > +             blk_queue_max_discard_sectors(q,
> > > +                     backingq->limits.max_discard_sectors);
> > > +
> > > +             blk_queue_max_write_zeroes_sectors(q,
> > > +                     backingq->limits.max_write_zeroes_sectors);
> > > +
> > > +             q->limits.discard_granularity =
> > > +                     backingq->limits.discard_granularity;
> > > +
> > > +             q->limits.discard_alignment =
> > > +                     backingq->limits.discard_alignment;
> >
> > I think it isn't necessary to mirror backing queue's discard/write_zeros
> > capabilities, given either fs of the underlying queue can deal with well.
> >
> > >
> > >       /*
> > >        * We use punch hole to reclaim the free space used by the
> > > @@ -866,22 +885,24 @@ static void loop_config_discard(struct loop_device *lo)
> > >        * encryption is enabled, because it may give an attacker
> > >        * useful information.
> > >        */
> > > -     if ((!file->f_op->fallocate) ||
> > > -         lo->lo_encrypt_key_size) {
> > > +     } else if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
> > >               q->limits.discard_granularity = 0;
> > >               q->limits.discard_alignment = 0;
> > >               blk_queue_max_discard_sectors(q, 0);
> > >               blk_queue_max_write_zeroes_sectors(q, 0);
> > > -             blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
> > > -             return;
> > > -     }
> > >
> > > -     q->limits.discard_granularity = inode->i_sb->s_blocksize;
> > > -     q->limits.discard_alignment = 0;
> > > +     } else {
> > > +             q->limits.discard_granularity = inode->i_sb->s_blocksize;
> > > +             q->limits.discard_alignment = 0;
> > > +
> > > +             blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
> > > +             blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
> > > +     }
> > >
> > > -     blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
> > > -     blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
> > > -     blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > > +     if (q->limits.max_discard_sectors || q->limits.max_write_zeroes_sectors)
> > > +             blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > > +     else
> > > +             blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
> > >  }
> >
> > Looks it should work just by mirroring backing queue's discard
> > capability to loop queue in case that the loop is backed by
> > block device, doesn't it? Meantime the unified discard limits &
> > write_zeros limits can be kept.
> 
> I tested this out, and you're right that I could just flip the
> QUEUE_FLAG_DISCARD based on whether its a block device, and leave

What I meant actually is to do the following discard config:

	bool discard;
	if (S_ISBLK(inode->i_mode)) {
		struct request_queue *backingq = bdev_get_queue(inode->i_bdev);
		discard = blk_queue_discard(backingq);
	} else if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size)
		discard = false;
	else
		discard = true;

	if (discard) {
		blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
		blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
	} else {
		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
	}

> everything else alone, to completely disable discard support for loop
> devices backed by block devices. This seems to work for programs like
> mkfs.ext4, but still leaves problems for coreutils cp.
> 
> But is that really the right call? With this change, we're not only
> able to use loop devices in this way, but we're able to use the
> discard and zero functionality of the underlying block device by
> simply passing it through. To me that seemed better than disabling all
> discard support for block devices, which would severely slow us down
> on some devices.

I guess the above approach can do the same job with yours, but simpler.

thanks,
Ming

  reply index

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-30 23:06 [PATCH 0/2] loop: Better discard " Evan Green
2018-10-30 23:06 ` [PATCH 1/2] loop: Report EOPNOTSUPP properly Evan Green
2018-11-28  1:06   ` Ming Lei
2018-10-30 23:06 ` [PATCH 2/2] loop: Better discard support for block devices Evan Green
2018-11-26 18:53   ` Evan Green
2018-11-27  2:55     ` Ming Lei
2018-11-27 23:34       ` Evan Green
2018-11-28  1:28         ` Ming Lei
2018-11-28  1:26   ` Ming Lei
2018-12-04 22:19     ` Evan Green
2018-12-05  1:10       ` Ming Lei [this message]
2018-12-05 19:35         ` Evan Green
2018-12-06  0:22           ` Ming Lei
2018-12-06  3:15           ` Martin K. Petersen
2018-12-10 17:31             ` Evan Green
2018-12-18 23:48               ` Evan Green
2018-10-30 23:50 ` [PATCH 0/2] loop: Better discard " Bart Van Assche
2018-11-01 18:15   ` Evan Green
2018-11-01 22:41     ` Gwendal Grignou
2018-11-01 22:44     ` Gwendal Grignou
2018-11-02 16:02       ` Bart Van Assche
2018-11-05 20:35         ` Evan Green
2020-03-17 15:11 [PATCH 0/2] loop: Better discard support " Andrzej Pietrasiewicz
2020-03-17 15:11 ` [PATCH 2/2] " Andrzej Pietrasiewicz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181205011059.GA17845@ming.t460p \
    --to=ming.lei@redhat.com \
    --cc=asavery@chromium.org \
    --cc=axboe@kernel.dk \
    --cc=evgreen@chromium.org \
    --cc=gwendal@chromium.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git