All of lore.kernel.org
 help / color / mirror / Atom feed
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
To: linux-raid@vger.kernel.org
Cc: shli@fb.com, neilb@suse.com, jes.sorensen@gmail.com,
	Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Subject: [PATCH v3 4/9] raid5: calculate partial parity for a stripe
Date: Mon, 30 Jan 2017 19:59:48 +0100	[thread overview]
Message-ID: <20170130185953.30428-5-artur.paszkiewicz@intel.com> (raw)
In-Reply-To: <20170130185953.30428-1-artur.paszkiewicz@intel.com>

Attach a page for holding the partial parity data to stripe_head.
Allocate it only if mddev has the MD_HAS_PPL flag set.

Partial parity is the xor of not modified data chunks of a stripe and is
calculated as follows:

- reconstruct-write case:
  xor data from all not updated disks in a stripe

- read-modify-write case:
  xor old data and parity from all updated disks in a stripe

Implement it using the async_tx API and integrate into raid_run_ops().
It must be called when we still have access to old data, so do it when
STRIPE_OP_BIODRAIN is set, but before ops_run_prexor5(). The result is
stored into sh->ppl_page.

Partial parity is not meaningful for full stripe write and is not stored
in the log or used for recovery, so don't attempt to calculate it when
stripe has STRIPE_FULL_WRITE.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/md/raid5.h |  2 ++
 2 files changed, 100 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d1cba941951e..e1e238da32ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -466,6 +466,11 @@ static void shrink_buffers(struct stripe_head *sh)
 		sh->dev[i].page = NULL;
 		put_page(p);
 	}
+
+	if (sh->ppl_page) {
+		put_page(sh->ppl_page);
+		sh->ppl_page = NULL;
+	}
 }
 
 static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
@@ -482,6 +487,13 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
 		sh->dev[i].page = page;
 		sh->dev[i].orig_page = page;
 	}
+
+	if (test_bit(MD_HAS_PPL, &sh->raid_conf->mddev->flags)) {
+		sh->ppl_page = alloc_page(gfp);
+		if (!sh->ppl_page)
+			return 1;
+	}
+
 	return 0;
 }
 
@@ -1977,6 +1989,55 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
 			   &sh->ops.zero_sum_result, percpu->spare_page, &submit);
 }
 
+static struct dma_async_tx_descriptor *
+ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
+		       struct dma_async_tx_descriptor *tx)
+{
+	int disks = sh->disks;
+	struct page **xor_srcs = flex_array_get(percpu->scribble, 0);
+	int count = 0, pd_idx = sh->pd_idx, i;
+	struct async_submit_ctl submit;
+
+	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+
+	/*
+	 * Partial parity is the XOR of stripe data chunks that are not changed
+	 * during the write request. Depending on available data
+	 * (read-modify-write vs. reconstruct-write case) we calculate it
+	 * differently.
+	 */
+	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
+		/* rmw: xor old data and parity from updated disks */
+		for (i = disks; i--;) {
+			struct r5dev *dev = &sh->dev[i];
+			if (test_bit(R5_Wantdrain, &dev->flags) || i == pd_idx)
+				xor_srcs[count++] = dev->page;
+		}
+	} else if (sh->reconstruct_state == reconstruct_state_drain_run) {
+		/* rcw: xor data from all not updated disks */
+		for (i = disks; i--;) {
+			struct r5dev *dev = &sh->dev[i];
+			if (test_bit(R5_UPTODATE, &dev->flags))
+				xor_srcs[count++] = dev->page;
+		}
+	} else {
+		return tx;
+	}
+
+	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, tx, NULL, sh,
+			  flex_array_get(percpu->scribble, 0)
+			  + sizeof(struct page *) * (sh->disks + 2));
+
+	if (count == 1)
+		tx = async_memcpy(sh->ppl_page, xor_srcs[0], 0, 0, PAGE_SIZE,
+				  &submit);
+	else
+		tx = async_xor(sh->ppl_page, xor_srcs, 0, count, PAGE_SIZE,
+			       &submit);
+
+	return tx;
+}
+
 static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 {
 	int overlap_clear = 0, i, disks = sh->disks;
@@ -2007,6 +2068,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 			async_tx_ack(tx);
 	}
 
+	if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
+		tx = ops_run_partial_parity(sh, percpu, tx);
+
 	if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
 		if (level < 6)
 			tx = ops_run_prexor5(sh, percpu, tx);
@@ -3058,6 +3122,12 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 		s->locked++;
 	}
 
+	if (level == 5 && test_bit(MD_HAS_PPL, &conf->mddev->flags) &&
+	    test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
+	    !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
+	    test_bit(R5_Insync, &sh->dev[pd_idx].flags))
+		set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request);
+
 	pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
 		__func__, (unsigned long long)sh->sector,
 		s->locked, s->ops_request);
@@ -3105,6 +3175,34 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
 	if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
 		goto overlap;
 
+	if (forwrite && test_bit(MD_HAS_PPL, &conf->mddev->flags)) {
+		/*
+		 * With PPL only writes to consecutive data chunks within a
+		 * stripe are allowed. Not really an overlap, but
+		 * wait_for_overlap can be used to handle this.
+		 */
+		sector_t sector;
+		sector_t first = 0;
+		sector_t last = 0;
+		int count = 0;
+		int i;
+
+		for (i = 0; i < sh->disks; i++) {
+			if (i != sh->pd_idx &&
+			    (i == dd_idx || sh->dev[i].towrite)) {
+				sector = sh->dev[i].sector;
+				if (count == 0 || sector < first)
+					first = sector;
+				if (sector > last)
+					last = sector;
+				count++;
+			}
+		}
+
+		if (first + conf->chunk_sectors * (count - 1) != last)
+			goto overlap;
+	}
+
 	if (!forwrite || previous)
 		clear_bit(STRIPE_BATCH_READY, &sh->state);
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 0f64a58873de..88f1e52d9daf 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -228,6 +228,7 @@ struct stripe_head {
 	struct list_head	log_list;
 	sector_t		log_start; /* first meta block on the journal */
 	struct list_head	r5c; /* for r5c_cache->stripe_in_journal */
+	struct page		*ppl_page; /* partial parity of this stripe */
 	/**
 	 * struct stripe_operations
 	 * @target - STRIPE_OP_COMPUTE_BLK target
@@ -400,6 +401,7 @@ enum {
 	STRIPE_OP_BIODRAIN,
 	STRIPE_OP_RECONSTRUCT,
 	STRIPE_OP_CHECK,
+	STRIPE_OP_PARTIAL_PARITY,
 };
 
 /*
-- 
2.11.0


  parent reply	other threads:[~2017-01-30 18:59 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-30 18:59 [PATCH v3 0/9] Partial Parity Log for MD RAID 5 Artur Paszkiewicz
2017-01-30 18:59 ` [PATCH v3 1/9] raid5-cache: move declarations to separate header Artur Paszkiewicz
2017-01-30 18:59 ` [PATCH v3 2/9] raid5-cache: add policy logic Artur Paszkiewicz
2017-01-30 18:59 ` [PATCH v3 3/9] md: superblock changes for PPL Artur Paszkiewicz
2017-02-07 21:20   ` Shaohua Li
2017-02-08 11:58     ` Artur Paszkiewicz
2017-01-30 18:59 ` Artur Paszkiewicz [this message]
2017-02-07 21:25   ` [PATCH v3 4/9] raid5: calculate partial parity for a stripe Shaohua Li
2017-02-08 11:58     ` Artur Paszkiewicz
2017-01-30 18:59 ` [PATCH v3 5/9] raid5-ppl: Partial Parity Log write logging implementation Artur Paszkiewicz
2017-02-07 21:42   ` Shaohua Li
2017-02-08 11:58     ` Artur Paszkiewicz
2017-02-08  5:34       ` Shaohua Li
2017-02-09 15:35         ` Artur Paszkiewicz
2017-02-09 17:09           ` Shaohua Li
2017-02-09 16:06         ` Wols Lists
2017-02-09 17:13           ` Shaohua Li
2017-01-30 18:59 ` [PATCH v3 6/9] md: add sysfs entries for PPL Artur Paszkiewicz
2017-02-07 21:49   ` Shaohua Li
2017-02-08 11:58     ` Artur Paszkiewicz
2017-02-08 18:14       ` Shaohua Li
2017-01-30 18:59 ` [PATCH v3 7/9] raid5-ppl: load and recover the log Artur Paszkiewicz
2017-01-30 18:59 ` [PATCH v3 8/9] raid5-ppl: support disk hot add/remove with PPL Artur Paszkiewicz
2017-01-30 18:59 ` [PATCH v3 9/9] raid5-ppl: runtime PPL enabling or disabling Artur Paszkiewicz
2017-03-27  5:08   ` NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170130185953.30428-5-artur.paszkiewicz@intel.com \
    --to=artur.paszkiewicz@intel.com \
    --cc=jes.sorensen@gmail.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.com \
    --cc=shli@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.