All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boaz Harrosh <bharrosh-C4P08NqkoRlBDgjK7y7TUQ@public.gmane.org>
To: open-osd <osd-dev-yNzVSZO3znNg9hUCZPvPmw@public.gmane.org>,
	NFS list <linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	Daniel Gryniewicz <dang-Jp3n8lUXroRWk0Htik3J/w@public.gmane.org>,
	Elizabeth Ellenbogen Ziph
	<elizabeth-Jp3n8lUXroRWk0Htik3J/w@public.gmane.org>,
	"Pathak,
	Santosh" <santoshp-C4P08NqkoRlBDgjK7y7TUQ@public.gmane.org>,
	linux-fsdevel
	<linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Cc: Tigran Mkrtchyan <tigran.mkrtchyan-T5F83Mi6MZE@public.gmane.org>,
	Benny Halevy <bhalevy-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>,
	Sachin bhamare
	<sachin.bhamare-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 3/3] ore: Support for raid 6
Date: Thu, 22 May 2014 16:11:34 +0300	[thread overview]
Message-ID: <537DF786.2070506@panasas.com> (raw)
In-Reply-To: <537DF5B4.4070001-C4P08NqkoRlBDgjK7y7TUQ@public.gmane.org>


This simple patch adds support for raid6 to the ORE.
Most operations and calculations where already for the general
case. Only things left:
* call async_gen_syndrome() in the case of raid6
  (NOTE that the raid6 math is the one supported by the Linux Kernel
   see: crypto/async_tx/async_pq.c)
* call _ore_add_parity_unit() twice with only last call generating
  the redundancy pages.

* Fix couple BUGS in old code
  a. In reads when parity==2 it can happen that per_dev->length=0
     but per_dev->offset was set and adjusted by _ore_add_sg_seg().
     Don't let it be overwritten.
  b. The all 'cur_comp > starting_dev' thing to determine if:
       "per_dev->offset is in the current stripe number or the
       next one."
     Was a complete raid5/4 accident. When parity==2 this is not
     at all true usually. All we need to do is increment si->ob_offset
     once we pass by the first parity device.
     (This also greatly simplifies the code, amen)
  c. Calculation of si->dev rotation can overflow when parity==2.

* Then last enable raid6 in ore_verify_layout()

I want to deeply thank Daniel Gryniewicz who found first all the
bugs in the old raid code, and inspired these patches:
	Inspired-by Daniel Gryniewicz <dang-Jp3n8lUXroRWk0Htik3J/w@public.gmane.org>

Signed-off-by: Boaz Harrosh <bharrosh-C4P08NqkoRlBDgjK7y7TUQ@public.gmane.org>
---
 fs/exofs/Kconfig.ore |  2 ++
 fs/exofs/ore.c       | 75 ++++++++++++++++++++++++++++++++++++----------------
 fs/exofs/ore_raid.c  | 37 +++++++++++++++++---------
 fs/exofs/ore_raid.h  |  3 ++-
 4 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
index 1ca7fb7..2daf232 100644
--- a/fs/exofs/Kconfig.ore
+++ b/fs/exofs/Kconfig.ore
@@ -9,4 +9,6 @@ config ORE
 	tristate
 	depends on EXOFS_FS || PNFS_OBJLAYOUT
 	select ASYNC_XOR
+	select RAID6_PQ
+	select ASYNC_PQ
 	default SCSI_OSD_ULD
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 0e2a835..cfc0205 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -58,9 +58,12 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
 		layout->parity = 1;
 		break;
 	case PNFS_OSD_RAID_PQ:
+		layout->parity = 2;
+		break;
 	case PNFS_OSD_RAID_4:
 	default:
-		ORE_ERR("Only RAID_0/5 for now\n");
+		ORE_ERR("Only RAID_0/5/6 for now received-enum=%d\n",
+			layout->raid_algorithm);
 		return -EINVAL;
 	}
 	if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
@@ -112,6 +115,8 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
 		layout->max_io_length /= stripe_length;
 		layout->max_io_length *= stripe_length;
 	}
+	ORE_DBGMSG("max_io_length=0x%lx\n", layout->max_io_length);
+
 	return 0;
 }
 EXPORT_SYMBOL(ore_verify_layout);
@@ -561,7 +566,8 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
 
 		si->par_dev = (group_width + group_width - parity - RxP) %
 			      group_width + first_dev;
-		si->dev = (group_width + C - RxP) % group_width + first_dev;
+		si->dev = (group_width + group_width + C - RxP) %
+			  group_width + first_dev;
 		si->bytes_in_stripe = U;
 		si->first_stripe_start = M * S + G * T + N * U;
 	} else {
@@ -651,6 +657,43 @@ out:	/* we fail the complete unit on an error eg don't advance
 	return ret;
 }
 
+static int _add_parity_units(struct ore_io_state *ios,
+			     struct ore_striping_info *si,
+			     unsigned dev, unsigned first_dev,
+			     unsigned mirrors_p1, unsigned devs_in_group,
+			     unsigned cur_len)
+{
+	unsigned do_parity;
+	int ret = 0;
+
+	for (do_parity = ios->layout->parity; do_parity; --do_parity) {
+		struct ore_per_dev_state *per_dev;
+
+		per_dev = &ios->per_dev[dev - first_dev];
+		if (!per_dev->length && !per_dev->offset) {
+			/* Only/always the parity unit of the first
+			 * stripe will be empty. So this is a chance to
+			 * initialize the per_dev info.
+			 */
+			per_dev->dev = dev;
+			per_dev->offset = si->obj_offset - si->unit_off;
+		}
+
+		ret = _ore_add_parity_unit(ios, si, per_dev, cur_len,
+					   do_parity == 1);
+		if (unlikely(ret))
+				break;
+
+		if (do_parity != 1) {
+			dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;
+			si->cur_comp = (si->cur_comp + 1) %
+						       ios->layout->group_width;
+		}
+	}
+
+	return ret;
+}
+
 static int _prepare_for_striping(struct ore_io_state *ios)
 {
 	struct ore_striping_info *si = &ios->si;
@@ -660,7 +703,6 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 	unsigned devs_in_group = group_width * mirrors_p1;
 	unsigned dev = si->dev;
 	unsigned first_dev = dev - (dev % devs_in_group);
-	unsigned dev_order;
 	unsigned cur_pg = ios->pages_consumed;
 	u64 length = ios->length;
 	int ret = 0;
@@ -672,14 +714,13 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 
 	BUG_ON(length > si->length);
 
-	dev_order = si->cur_comp;
-
 	while (length) {
 		struct ore_per_dev_state *per_dev =
 						&ios->per_dev[dev - first_dev];
 		unsigned cur_len, page_off = 0;
 
-		if (!per_dev->length) {
+		if (!per_dev->length && !per_dev->offset) {
+			/* First time initialize the per_dev info. */
 			per_dev->dev = dev;
 			if (dev == si->dev) {
 				WARN_ON(dev == si->par_dev);
@@ -688,13 +729,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 				page_off = si->unit_off & ~PAGE_MASK;
 				BUG_ON(page_off && (page_off != ios->pgbase));
 			} else {
-				if (si->cur_comp > dev_order)
-					per_dev->offset =
-						si->obj_offset - si->unit_off;
-				else /* si->cur_comp < dev_order */
-					per_dev->offset =
-						si->obj_offset + stripe_unit -
-								   si->unit_off;
+				per_dev->offset = si->obj_offset - si->unit_off;
 				cur_len = stripe_unit;
 			}
 		} else {
@@ -721,20 +756,12 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 				/* If last stripe operate on parity comp */
 				si->cur_comp = group_width - ios->layout->parity;
 			}
-			per_dev = &ios->per_dev[dev - first_dev];
-			if (!per_dev->length) {
-				/* Only/always the parity unit of the first
-				 * stripe will be empty. So this is a chance to
-				 * initialize the per_dev info.
-				 */
-				per_dev->dev = dev;
-				per_dev->offset = si->obj_offset - si->unit_off;
-			}
 
 			/* In writes cur_len just means if it's the
 			 * last one. See _ore_add_parity_unit.
 			 */
-			ret = _ore_add_parity_unit(ios, si, per_dev,
+			ret = _add_parity_units(ios, si, dev, first_dev,
+						mirrors_p1, devs_in_group,
 						ios->sp2d ? length : cur_len);
 			if (unlikely(ret))
 					goto out;
@@ -746,6 +773,8 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 			/* Next stripe, start fresh */
 			si->cur_comp = 0;
 			si->cur_pg = 0;
+			si->obj_offset += cur_len;
+			si->unit_off = 0;
 		}
 	}
 out:
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d58a952..7f20f25 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -218,20 +218,28 @@ static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d)
 static void _gen_xor_unit(struct __stripe_pages_2d *sp2d)
 {
 	unsigned p;
+	unsigned tx_flags = ASYNC_TX_ACK;
+
+	if (sp2d->parity == 1)
+		tx_flags |= ASYNC_TX_XOR_ZERO_DST;
+
 	for (p = 0; p < sp2d->pages_in_unit; p++) {
 		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
 
 		if (!_1ps->write_count)
 			continue;
 
-		init_async_submit(&_1ps->submit,
-			ASYNC_TX_XOR_ZERO_DST | ASYNC_TX_ACK,
+		init_async_submit(&_1ps->submit, tx_flags,
 			NULL, NULL, NULL, (addr_conv_t *)_1ps->scribble);
 
-		/* TODO: raid6 */
-		_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], _1ps->pages,
-				     0, sp2d->data_devs, PAGE_SIZE,
-				     &_1ps->submit);
+		if (sp2d->parity == 1)
+			_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs],
+						_1ps->pages, 0, sp2d->data_devs,
+						PAGE_SIZE, &_1ps->submit);
+		else /* parity == 2 */
+			_1ps->tx = async_gen_syndrome(_1ps->pages, 0,
+						sp2d->data_devs + sp2d->parity,
+						PAGE_SIZE, &_1ps->submit);
 	}
 
 	for (p = 0; p < sp2d->pages_in_unit; p++) {
@@ -616,7 +624,7 @@ static int _read_4_write_execute(struct ore_io_state *ios)
 int _ore_add_parity_unit(struct ore_io_state *ios,
 			    struct ore_striping_info *si,
 			    struct ore_per_dev_state *per_dev,
-			    unsigned cur_len)
+			    unsigned cur_len, bool do_xor)
 {
 	if (ios->reading) {
 		if (per_dev->cur_sg >= ios->sgs_per_dev) {
@@ -641,9 +649,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 			/* If first stripe, Read in all read4write pages
 			 * (if needed) before we calculate the first parity.
 			 */
-			_read_4_write_first_stripe(ios);
+			if (do_xor)
+				_read_4_write_first_stripe(ios);
 		}
-		if (!cur_len) /* If last stripe r4w pages of last stripe */
+		if (!cur_len && do_xor)
+			/* If last stripe r4w pages of last stripe */
 			_read_4_write_last_stripe(ios);
 		_read_4_write_execute(ios);
 
@@ -655,7 +665,7 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 			++(ios->cur_par_page);
 		}
 
-		BUG_ON(si->cur_comp != sp2d->data_devs);
+		BUG_ON(si->cur_comp < sp2d->data_devs);
 		BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit);
 
 		ret = _ore_add_stripe_unit(ios,  &array_start, 0, pages,
@@ -663,9 +673,10 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 		if (unlikely(ret))
 			return ret;
 
-		/* TODO: raid6 if (last_parity_dev) */
-		_gen_xor_unit(sp2d);
-		_sp2d_reset(sp2d, ios->r4w, ios->private);
+		if (do_xor) {
+			_gen_xor_unit(sp2d);
+			_sp2d_reset(sp2d, ios->r4w, ios->private);
+		}
 	}
 	return 0;
 }
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index d365bda..cf6375d 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -38,7 +38,8 @@ void _ore_free_raid_stuff(struct ore_io_state *ios);
 void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,
 		 bool not_last);
 int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si,
-		     struct ore_per_dev_state *per_dev, unsigned cur_len);
+		     struct ore_per_dev_state *per_dev, unsigned cur_len,
+		     bool do_xor);
 void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,
 		       struct ore_striping_info *si, struct page *page);
 static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d,
-- 
1.9.0


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

WARNING: multiple messages have this Message-ID (diff)
From: Boaz Harrosh <bharrosh@panasas.com>
To: open-osd <osd-dev@open-osd.org>,
	NFS list <linux-nfs@vger.kernel.org>,
	Daniel Gryniewicz <dang@linuxbox.com>,
	Elizabeth Ellenbogen Ziph <elizabeth@linuxbox.com>,
	"Pathak, Santosh" <santoshp@panasas.com>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>
Cc: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>,
	Benny Halevy <bhalevy@primarydata.com>,
	Sachin bhamare <sachin.bhamare@gmail.com>
Subject: [PATCH 3/3] ore: Support for raid 6
Date: Thu, 22 May 2014 16:11:34 +0300	[thread overview]
Message-ID: <537DF786.2070506@panasas.com> (raw)
In-Reply-To: <537DF5B4.4070001@panasas.com>


This simple patch adds support for raid6 to the ORE.
Most operations and calculations where already for the general
case. Only things left:
* call async_gen_syndrome() in the case of raid6
  (NOTE that the raid6 math is the one supported by the Linux Kernel
   see: crypto/async_tx/async_pq.c)
* call _ore_add_parity_unit() twice with only last call generating
  the redundancy pages.

* Fix couple BUGS in old code
  a. In reads when parity==2 it can happen that per_dev->length=0
     but per_dev->offset was set and adjusted by _ore_add_sg_seg().
     Don't let it be overwritten.
  b. The all 'cur_comp > starting_dev' thing to determine if:
       "per_dev->offset is in the current stripe number or the
       next one."
     Was a complete raid5/4 accident. When parity==2 this is not
     at all true usually. All we need to do is increment si->ob_offset
     once we pass by the first parity device.
     (This also greatly simplifies the code, amen)
  c. Calculation of si->dev rotation can overflow when parity==2.

* Then last enable raid6 in ore_verify_layout()

I want to deeply thank Daniel Gryniewicz who found first all the
bugs in the old raid code, and inspired these patches:
	Inspired-by Daniel Gryniewicz <dang@linuxbox.com>

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/Kconfig.ore |  2 ++
 fs/exofs/ore.c       | 75 ++++++++++++++++++++++++++++++++++++----------------
 fs/exofs/ore_raid.c  | 37 +++++++++++++++++---------
 fs/exofs/ore_raid.h  |  3 ++-
 4 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
index 1ca7fb7..2daf232 100644
--- a/fs/exofs/Kconfig.ore
+++ b/fs/exofs/Kconfig.ore
@@ -9,4 +9,6 @@ config ORE
 	tristate
 	depends on EXOFS_FS || PNFS_OBJLAYOUT
 	select ASYNC_XOR
+	select RAID6_PQ
+	select ASYNC_PQ
 	default SCSI_OSD_ULD
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 0e2a835..cfc0205 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -58,9 +58,12 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
 		layout->parity = 1;
 		break;
 	case PNFS_OSD_RAID_PQ:
+		layout->parity = 2;
+		break;
 	case PNFS_OSD_RAID_4:
 	default:
-		ORE_ERR("Only RAID_0/5 for now\n");
+		ORE_ERR("Only RAID_0/5/6 for now received-enum=%d\n",
+			layout->raid_algorithm);
 		return -EINVAL;
 	}
 	if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
@@ -112,6 +115,8 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
 		layout->max_io_length /= stripe_length;
 		layout->max_io_length *= stripe_length;
 	}
+	ORE_DBGMSG("max_io_length=0x%lx\n", layout->max_io_length);
+
 	return 0;
 }
 EXPORT_SYMBOL(ore_verify_layout);
@@ -561,7 +566,8 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
 
 		si->par_dev = (group_width + group_width - parity - RxP) %
 			      group_width + first_dev;
-		si->dev = (group_width + C - RxP) % group_width + first_dev;
+		si->dev = (group_width + group_width + C - RxP) %
+			  group_width + first_dev;
 		si->bytes_in_stripe = U;
 		si->first_stripe_start = M * S + G * T + N * U;
 	} else {
@@ -651,6 +657,43 @@ out:	/* we fail the complete unit on an error eg don't advance
 	return ret;
 }
 
+static int _add_parity_units(struct ore_io_state *ios,
+			     struct ore_striping_info *si,
+			     unsigned dev, unsigned first_dev,
+			     unsigned mirrors_p1, unsigned devs_in_group,
+			     unsigned cur_len)
+{
+	unsigned do_parity;
+	int ret = 0;
+
+	for (do_parity = ios->layout->parity; do_parity; --do_parity) {
+		struct ore_per_dev_state *per_dev;
+
+		per_dev = &ios->per_dev[dev - first_dev];
+		if (!per_dev->length && !per_dev->offset) {
+			/* Only/always the parity unit of the first
+			 * stripe will be empty. So this is a chance to
+			 * initialize the per_dev info.
+			 */
+			per_dev->dev = dev;
+			per_dev->offset = si->obj_offset - si->unit_off;
+		}
+
+		ret = _ore_add_parity_unit(ios, si, per_dev, cur_len,
+					   do_parity == 1);
+		if (unlikely(ret))
+				break;
+
+		if (do_parity != 1) {
+			dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;
+			si->cur_comp = (si->cur_comp + 1) %
+						       ios->layout->group_width;
+		}
+	}
+
+	return ret;
+}
+
 static int _prepare_for_striping(struct ore_io_state *ios)
 {
 	struct ore_striping_info *si = &ios->si;
@@ -660,7 +703,6 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 	unsigned devs_in_group = group_width * mirrors_p1;
 	unsigned dev = si->dev;
 	unsigned first_dev = dev - (dev % devs_in_group);
-	unsigned dev_order;
 	unsigned cur_pg = ios->pages_consumed;
 	u64 length = ios->length;
 	int ret = 0;
@@ -672,14 +714,13 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 
 	BUG_ON(length > si->length);
 
-	dev_order = si->cur_comp;
-
 	while (length) {
 		struct ore_per_dev_state *per_dev =
 						&ios->per_dev[dev - first_dev];
 		unsigned cur_len, page_off = 0;
 
-		if (!per_dev->length) {
+		if (!per_dev->length && !per_dev->offset) {
+			/* First time initialize the per_dev info. */
 			per_dev->dev = dev;
 			if (dev == si->dev) {
 				WARN_ON(dev == si->par_dev);
@@ -688,13 +729,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 				page_off = si->unit_off & ~PAGE_MASK;
 				BUG_ON(page_off && (page_off != ios->pgbase));
 			} else {
-				if (si->cur_comp > dev_order)
-					per_dev->offset =
-						si->obj_offset - si->unit_off;
-				else /* si->cur_comp < dev_order */
-					per_dev->offset =
-						si->obj_offset + stripe_unit -
-								   si->unit_off;
+				per_dev->offset = si->obj_offset - si->unit_off;
 				cur_len = stripe_unit;
 			}
 		} else {
@@ -721,20 +756,12 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 				/* If last stripe operate on parity comp */
 				si->cur_comp = group_width - ios->layout->parity;
 			}
-			per_dev = &ios->per_dev[dev - first_dev];
-			if (!per_dev->length) {
-				/* Only/always the parity unit of the first
-				 * stripe will be empty. So this is a chance to
-				 * initialize the per_dev info.
-				 */
-				per_dev->dev = dev;
-				per_dev->offset = si->obj_offset - si->unit_off;
-			}
 
 			/* In writes cur_len just means if it's the
 			 * last one. See _ore_add_parity_unit.
 			 */
-			ret = _ore_add_parity_unit(ios, si, per_dev,
+			ret = _add_parity_units(ios, si, dev, first_dev,
+						mirrors_p1, devs_in_group,
 						ios->sp2d ? length : cur_len);
 			if (unlikely(ret))
 					goto out;
@@ -746,6 +773,8 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 			/* Next stripe, start fresh */
 			si->cur_comp = 0;
 			si->cur_pg = 0;
+			si->obj_offset += cur_len;
+			si->unit_off = 0;
 		}
 	}
 out:
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d58a952..7f20f25 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -218,20 +218,28 @@ static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d)
 static void _gen_xor_unit(struct __stripe_pages_2d *sp2d)
 {
 	unsigned p;
+	unsigned tx_flags = ASYNC_TX_ACK;
+
+	if (sp2d->parity == 1)
+		tx_flags |= ASYNC_TX_XOR_ZERO_DST;
+
 	for (p = 0; p < sp2d->pages_in_unit; p++) {
 		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
 
 		if (!_1ps->write_count)
 			continue;
 
-		init_async_submit(&_1ps->submit,
-			ASYNC_TX_XOR_ZERO_DST | ASYNC_TX_ACK,
+		init_async_submit(&_1ps->submit, tx_flags,
 			NULL, NULL, NULL, (addr_conv_t *)_1ps->scribble);
 
-		/* TODO: raid6 */
-		_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], _1ps->pages,
-				     0, sp2d->data_devs, PAGE_SIZE,
-				     &_1ps->submit);
+		if (sp2d->parity == 1)
+			_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs],
+						_1ps->pages, 0, sp2d->data_devs,
+						PAGE_SIZE, &_1ps->submit);
+		else /* parity == 2 */
+			_1ps->tx = async_gen_syndrome(_1ps->pages, 0,
+						sp2d->data_devs + sp2d->parity,
+						PAGE_SIZE, &_1ps->submit);
 	}
 
 	for (p = 0; p < sp2d->pages_in_unit; p++) {
@@ -616,7 +624,7 @@ static int _read_4_write_execute(struct ore_io_state *ios)
 int _ore_add_parity_unit(struct ore_io_state *ios,
 			    struct ore_striping_info *si,
 			    struct ore_per_dev_state *per_dev,
-			    unsigned cur_len)
+			    unsigned cur_len, bool do_xor)
 {
 	if (ios->reading) {
 		if (per_dev->cur_sg >= ios->sgs_per_dev) {
@@ -641,9 +649,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 			/* If first stripe, Read in all read4write pages
 			 * (if needed) before we calculate the first parity.
 			 */
-			_read_4_write_first_stripe(ios);
+			if (do_xor)
+				_read_4_write_first_stripe(ios);
 		}
-		if (!cur_len) /* If last stripe r4w pages of last stripe */
+		if (!cur_len && do_xor)
+			/* If last stripe r4w pages of last stripe */
 			_read_4_write_last_stripe(ios);
 		_read_4_write_execute(ios);
 
@@ -655,7 +665,7 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 			++(ios->cur_par_page);
 		}
 
-		BUG_ON(si->cur_comp != sp2d->data_devs);
+		BUG_ON(si->cur_comp < sp2d->data_devs);
 		BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit);
 
 		ret = _ore_add_stripe_unit(ios,  &array_start, 0, pages,
@@ -663,9 +673,10 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 		if (unlikely(ret))
 			return ret;
 
-		/* TODO: raid6 if (last_parity_dev) */
-		_gen_xor_unit(sp2d);
-		_sp2d_reset(sp2d, ios->r4w, ios->private);
+		if (do_xor) {
+			_gen_xor_unit(sp2d);
+			_sp2d_reset(sp2d, ios->r4w, ios->private);
+		}
 	}
 	return 0;
 }
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index d365bda..cf6375d 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -38,7 +38,8 @@ void _ore_free_raid_stuff(struct ore_io_state *ios);
 void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,
 		 bool not_last);
 int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si,
-		     struct ore_per_dev_state *per_dev, unsigned cur_len);
+		     struct ore_per_dev_state *per_dev, unsigned cur_len,
+		     bool do_xor);
 void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,
 		       struct ore_striping_info *si, struct page *page);
 static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d,
-- 
1.9.0



  parent reply	other threads:[~2014-05-22 13:11 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-22 13:03 [PACHSET 0/3] ore: raid6 Boaz Harrosh
2014-05-22 13:03 ` Boaz Harrosh
2014-05-22 13:08 ` [PATCH 2/3] ore: Remove redundant dev_order(), more cleanups Boaz Harrosh
     [not found] ` <537DF5B4.4070001-C4P08NqkoRlBDgjK7y7TUQ@public.gmane.org>
2014-05-22 13:07   ` [PATCH 1/3] ore: (trivial) reformat some code Boaz Harrosh
2014-05-22 13:07     ` Boaz Harrosh
2014-05-22 13:11   ` Boaz Harrosh [this message]
2014-05-22 13:11     ` [PATCH 3/3] ore: Support for raid 6 Boaz Harrosh
  -- strict thread matches above, loose matches on Subject: below --
2014-04-10 10:45 [RFC 0/3] ore: raid6 Boaz Harrosh
2014-04-10 11:00 ` [PATCH 3/3] ore: Support for raid 6 Boaz Harrosh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=537DF786.2070506@panasas.com \
    --to=bharrosh-c4p08nqkorlbdgjk7y7tuq@public.gmane.org \
    --cc=bhalevy-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org \
    --cc=dang-Jp3n8lUXroRWk0Htik3J/w@public.gmane.org \
    --cc=elizabeth-Jp3n8lUXroRWk0Htik3J/w@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=osd-dev-yNzVSZO3znNg9hUCZPvPmw@public.gmane.org \
    --cc=sachin.bhamare-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=santoshp-C4P08NqkoRlBDgjK7y7TUQ@public.gmane.org \
    --cc=tigran.mkrtchyan-T5F83Mi6MZE@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.