All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaohua Li <shli@kernel.org>
To: NeilBrown <neilb@suse.com>
Cc: linux-raid@vger.kernel.org
Subject: Re: [md PATCH 3/5] md: move suspend_hi/lo handling into core md code
Date: Tue, 17 Oct 2017 23:16:20 -0700	[thread overview]
Message-ID: <20171018061620.alka3npi6pyd76py@kernel.org> (raw)
In-Reply-To: <150820840349.1646.9916811766142195995.stgit@noble>

On Tue, Oct 17, 2017 at 01:46:43PM +1100, Neil Brown wrote:
> responding to ->suspend_lo and ->suspend_hi is similar
> to responding to ->suspended.  It is best to wait in
> the common core code without incrementing ->active_io.
> This allows mddev_suspend()/mddev_resume() to work while
> requests are waiting for suspend_lo/hi to change.
> This is will be important after a subsequent patch
> which uses mddev_suspend() to synchronize updating for
> suspend_lo/hi.
> 
> So move the code for testing suspend_lo/hi out of raid1.c
> and raid5.c, and place it in md.c
> 
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  drivers/md/md.c    |   29 +++++++++++++++++++++++------
>  drivers/md/raid1.c |   12 ++++--------
>  drivers/md/raid5.c |   22 ----------------------
>  3 files changed, 27 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 308456842d3e..f8d0e41120cb 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
>   * call has finished, the bio has been linked into some internal structure
>   * and so is visible to ->quiesce(), so we don't need the refcount any more.
>   */
> +static bool is_suspended(struct mddev *mddev, struct bio *bio)
> +{
> +	if (mddev->suspended)
> +		return true;
> +	if (bio_data_dir(bio) != WRITE)
> +		return false;
> +	if (mddev->suspend_lo >= mddev->suspend_hi)
> +		return false;
> +	if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
> +		return false;
> +	if (bio_end_sector(bio) < mddev->suspend_lo)
> +		return false;
> +	return true;
> +}
> +
>  void md_handle_request(struct mddev *mddev, struct bio *bio)
>  {
>  check_suspended:
>  	rcu_read_lock();
> -	if (mddev->suspended) {
> +	if (is_suspended(mddev, bio)) {
>  		DEFINE_WAIT(__wait);
>  		for (;;) {
>  			prepare_to_wait(&mddev->sb_wait, &__wait,
>  					TASK_UNINTERRUPTIBLE);
> -			if (!mddev->suspended)
> +			if (!is_suspended(mddev, bio))
>  				break;
>  			rcu_read_unlock();
>  			schedule();
> @@ -4843,10 +4858,11 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
>  		goto unlock;
>  	old = mddev->suspend_lo;
>  	mddev->suspend_lo = new;
> -	if (new >= old)
> +	if (new >= old) {
>  		/* Shrinking suspended region */
> +		wake_up(&mddev->sb_wait);
>  		mddev->pers->quiesce(mddev, 2);

Do we still need the quiesce(mddev, 2)? sounds not to me.

> -	else {
> +	} else {
>  		/* Expanding suspended region - need to wait */
>  		mddev->pers->quiesce(mddev, 1);
>  		mddev->pers->quiesce(mddev, 0);
> @@ -4886,10 +4902,11 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
>  		goto unlock;
>  	old = mddev->suspend_hi;
>  	mddev->suspend_hi = new;
> -	if (new <= old)
> +	if (new <= old) {
>  		/* Shrinking suspended region */
> +		wake_up(&mddev->sb_wait);
>  		mddev->pers->quiesce(mddev, 2);
> -	else {
> +	} else {
>  		/* Expanding suspended region - need to wait */
>  		mddev->pers->quiesce(mddev, 1);
>  		mddev->pers->quiesce(mddev, 0);
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index f3f3e40dc9d8..277a145b3ff5 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1310,11 +1310,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>  	 */
>  
>  
> -	if ((bio_end_sector(bio) > mddev->suspend_lo &&
> -	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
> -	    (mddev_is_clustered(mddev) &&
> +	if (mddev_is_clustered(mddev) &&
>  	     md_cluster_ops->area_resyncing(mddev, WRITE,
> -		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
> +		     bio->bi_iter.bi_sector, bio_end_sector(bio))) {
>  
>  		/*
>  		 * As the suspend_* range is controlled by userspace, we want
> @@ -1325,12 +1323,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>  			sigset_t full, old;
>  			prepare_to_wait(&conf->wait_barrier,
>  					&w, TASK_INTERRUPTIBLE);
> -			if (bio_end_sector(bio) <= mddev->suspend_lo ||
> -			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
> -			    (mddev_is_clustered(mddev) &&
> +			if (mddev_is_clustered(mddev) &&
>  			     !md_cluster_ops->area_resyncing(mddev, WRITE,
>  				     bio->bi_iter.bi_sector,
> -				     bio_end_sector(bio))))
> +				     bio_end_sector(bio)))
>  				break;
>  			sigfillset(&full);
>  			sigprocmask(SIG_BLOCK, &full, &old);
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 928e24a07133..4f40ccd21cbb 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5682,28 +5682,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
>  				goto retry;
>  			}
>  
> -			if (rw == WRITE &&
> -			    logical_sector >= mddev->suspend_lo &&
> -			    logical_sector < mddev->suspend_hi) {
> -				raid5_release_stripe(sh);
> -				/* As the suspend_* range is controlled by
> -				 * userspace, we want an interruptible
> -				 * wait.
> -				 */
> -				prepare_to_wait(&conf->wait_for_overlap,
> -						&w, TASK_INTERRUPTIBLE);
> -				if (logical_sector >= mddev->suspend_lo &&
> -				    logical_sector < mddev->suspend_hi) {
> -					sigset_t full, old;
> -					sigfillset(&full);
> -					sigprocmask(SIG_BLOCK, &full, &old);
> -					schedule();
> -					sigprocmask(SIG_SETMASK, &old, NULL);
> -					do_prepare = true;
> -				}
> -				goto retry;
> -			}
> -
>  			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
>  			    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
>  				/* Stripe is busy expanding or

I think we can remove the state == 2 branch in raid5_quiesce().

This leaves only raid1 and md-cluster needs the quiesce(2) calls. In the
future, we probably should abstract that md-cluster logic to separate API and
let raid1 use it for waitting. The quiesce(2) is a strange name for the wait
purpose.

Thanks,
Shaohua


  reply	other threads:[~2017-10-18  6:16 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-17  2:46 [md PATCH 0/5] Address deadlock associated with setting suspend_lo NeilBrown
2017-10-17  2:46 ` [md PATCH 1/5] md: always hold reconfig_mutex when calling mddev_suspend() NeilBrown
2017-10-18  6:11   ` Shaohua Li
2017-10-18  7:35     ` NeilBrown
2017-10-19  1:17       ` [md PATCH 1/5 v2] " NeilBrown
2017-10-19  3:45         ` Shaohua Li
2017-10-19  6:29           ` NeilBrown
2017-10-20  4:37             ` Shaohua Li
2017-10-23  0:02               ` NeilBrown
2017-10-23  1:48                 ` Shaohua Li
2017-10-17  2:46 ` [md PATCH 5/5] md: allow metadata update while suspending NeilBrown
2017-10-17  2:46 ` [md PATCH 4/5] md: use mddev_suspend/resume instead of ->quiesce() NeilBrown
2017-10-17  2:46 ` [md PATCH 3/5] md: move suspend_hi/lo handling into core md code NeilBrown
2017-10-18  6:16   ` Shaohua Li [this message]
2017-10-18  7:40     ` NeilBrown
2017-10-19  1:49       ` [md PATCH 6/5] md: remove special meaning of ->quiesce(.., 2) NeilBrown
2017-10-17  2:46 ` [md PATCH 2/5] md: don't call bitmap_create() while array is quiesced NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171018061620.alka3npi6pyd76py@kernel.org \
    --to=shli@kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.