All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, christopher.leech@intel.com,
	linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: akpm@linux-foundation.org, torvalds@linux-foundation.org,
	yur@emcraft.com, wd@denx.de, arjan@linux.intel.com,
	rmk+kernel@arm.linux.org.uk
Subject: [PATCH 2.6.21-rc4 06/15] md: move write operations to raid5_run_ops
Date: Thu, 22 Mar 2007 23:52:07 -0700	[thread overview]
Message-ID: <20070323065207.15570.15540.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070323064856.15570.45052.stgit@dwillia2-linux.ch.intel.com>

handle_stripe sets STRIPE_OP_PREXOR, STRIPE_OP_BIODRAIN, STRIPE_OP_POSTXOR
to request a write to the stripe cache.  raid5_run_ops is triggerred to run
and executes the request outside the stripe lock.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c |  152 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 131 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0397e33..4d1adb5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1788,7 +1788,75 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 	}
 }
 
+static int handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
+{
+	int i, pd_idx = sh->pd_idx, disks = sh->disks;
+	int locked=0;
+
+	if (rcw == 0) {
+		/* skip the drain operation on an expand */
+		if (!expand) {
+			BUG_ON(test_and_set_bit(STRIPE_OP_BIODRAIN,
+				&sh->ops.pending));
+			sh->ops.count++;
+		}
+
+		BUG_ON(test_and_set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending));
+		sh->ops.count++;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+
+			if (dev->towrite) {
+				set_bit(R5_LOCKED, &dev->flags);
+				if (!expand)
+					clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	} else {
+		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
+			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+
+		BUG_ON(test_and_set_bit(STRIPE_OP_PREXOR, &sh->ops.pending) ||
+			test_and_set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ||
+			test_and_set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending));
+
+		sh->ops.count += 3;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+			if (i==pd_idx)
+				continue;
 
+			/* For a read-modify write there may be blocks that are
+			 * locked for reading while others are ready to be written
+			 * so we distinguish these blocks by the R5_Wantprexor bit
+			 */
+			if (dev->towrite &&
+			    (test_bit(R5_UPTODATE, &dev->flags) ||
+			    test_bit(R5_Wantcompute, &dev->flags))) {
+				set_bit(R5_Wantprexor, &dev->flags);
+				set_bit(R5_LOCKED, &dev->flags);
+				clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	}
+
+	/* keep the parity disk locked while asynchronous operations
+	 * are in flight
+	 */
+	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
+	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+	locked++;
+
+	PRINTK("%s: stripe %llu locked: %d pending: %lx\n",
+		__FUNCTION__, (unsigned long long)sh->sector,
+		locked, sh->ops.pending);
+
+	return locked;
+}
 
 /*
  * Each stripe/dev can have one or more bion attached.
@@ -2151,8 +2219,67 @@ static void handle_stripe5(struct stripe_head *sh)
 		set_bit(STRIPE_HANDLE, &sh->state);
 	}
 
-	/* now to consider writing and what else, if anything should be read */
-	if (to_write) {
+	/* Now we check to see if any write operations have recently
+	 * completed
+	 */
+
+	/* leave prexor set until postxor is done, allows us to distinguish
+	 * a rmw from a rcw during biodrain
+	 */
+	if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+
+		for (i=disks; i--;)
+			clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+	}
+
+	/* if only POSTXOR is set then this is an 'expand' postxor */
+	if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+		/* All the 'written' buffers and the parity block are ready to be
+		 * written back to disk
+		 */
+		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+		for (i=disks; i--;) {
+			dev = &sh->dev[i];
+			if (test_bit(R5_LOCKED, &dev->flags) &&
+				(i == sh->pd_idx || dev->written)) {
+				PRINTK("Writing block %d\n", i);
+				set_bit(R5_Wantwrite, &dev->flags);
+				if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+					sh->ops.count++;
+				if (!test_bit(R5_Insync, &dev->flags)
+				    || (i==sh->pd_idx && failed == 0))
+					set_bit(STRIPE_INSYNC, &sh->state);
+			}
+		}
+		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+			atomic_dec(&conf->preread_active_stripes);
+			if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+				md_wakeup_thread(conf->mddev->thread);
+		}
+	}
+
+	/* 1/ Now to consider new write requests and what else, if anything should be read
+	 * 2/ Check operations clobber the parity block so do not start new writes while
+	 *    a check is in flight
+	 * 3/ Write operations do not stack
+	 */
+	if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 		int rmw=0, rcw=0;
 		for (i=disks ; i--;) {
 			/* would I have to read this buffer for read_modify_write */
@@ -2219,25 +2346,8 @@ static void handle_stripe5(struct stripe_head *sh)
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
 		if (locked == 0 && (rcw == 0 ||rmw == 0) &&
-		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-			PRINTK("Computing parity...\n");
-			compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
-			/* now every locked buffer is ready to be written */
-			for (i=disks; i--;)
-				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-					PRINTK("Writing block %d\n", i);
-					locked++;
-					set_bit(R5_Wantwrite, &sh->dev[i].flags);
-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
-					    || (i==sh->pd_idx && failed == 0))
-						set_bit(STRIPE_INSYNC, &sh->state);
-				}
-			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-				atomic_dec(&conf->preread_active_stripes);
-				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
-					md_wakeup_thread(conf->mddev->thread);
-			}
-		}
+		    !test_bit(STRIPE_BIT_DELAY, &sh->state))
+			locked += handle_write_operations5(sh, rcw, 0);
 	}
 
 	/* maybe we need to check and possibly fix the parity for this stripe

  parent reply	other threads:[~2007-03-23  6:52 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-23  6:51 [PATCH 2.6.21-rc4 00/15] md raid5 acceleration and async_tx Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 01/15] dmaengine: add base support for the async_tx api Dan Williams
2007-03-23 14:18   ` Yuri Tikhonov
2007-03-26 16:48     ` Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 02/15] ARM: Add drivers/dma to arch/arm/Kconfig Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 03/15] dmaengine: add the async_tx api Dan Williams
2007-03-23  6:51 ` [PATCH 2.6.21-rc4 04/15] md: add raid5_run_ops and support routines Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 05/15] md: use raid5_run_ops for stripe cache operations Dan Williams
2007-03-23  6:52 ` Dan Williams [this message]
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 07/15] md: move raid5 compute block operations to raid5_run_ops Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 08/15] md: move raid5 parity checks " Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 09/15] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 10/15] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 11/15] md: move raid5 io requests to raid5_run_ops Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 12/15] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 13/15] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 14/15] iop13xx: Surface the iop13xx adma units to the iop-adma driver Dan Williams
2007-03-23  6:52 ` [PATCH 2.6.21-rc4 15/15] iop3xx: Surface the iop3xx DMA and AAU " Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070323065207.15570.15540.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@linux.intel.com \
    --cc=christopher.leech@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=rmk+kernel@arm.linux.org.uk \
    --cc=torvalds@linux-foundation.org \
    --cc=wd@denx.de \
    --cc=yur@emcraft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.