All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benny Halevy <bhalevy@panasas.com>
To: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: linux-nfs@vger.kernel.org, bharrosh@panasas.com
Subject: [PATCH v3 29/29] pnfs-obj: objio_osd: groups support
Date: Mon, 16 May 2011 09:24:36 -0700	[thread overview]
Message-ID: <1305563076-8362-1-git-send-email-bhalevy@panasas.com> (raw)
In-Reply-To: <4DD14D8E.1070701@panasas.com>

From: Boaz Harrosh <bharrosh@panasas.com>

* _calc_stripe_info() changes to accommodate for grouping
  calculations. Returns additional information

* old _prepare_pages() becomes _prepare_one_group()
  which stores pages belonging to one device group.

* Iterates on all groups calling _prepare_one_group().

* Enable mounting of groups data_maps (group_width != 0)

TODO:
  Support for parial layout will come in next patch

[Support partial layouts]
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c |  135 +++++++++++++++++++++++++++++++++---------
 1 files changed, 106 insertions(+), 29 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 6da4aa2..e7a0fcb 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -134,6 +134,8 @@ struct objio_segment {
 	unsigned mirrors_p1;
 	unsigned stripe_unit;
 	unsigned group_width;	/* Data stripe_units without integrity comps */
+	u64 group_depth;
+	unsigned group_count;
 
 	unsigned num_comps;
 	/* variable length */
@@ -252,12 +254,9 @@ static int _verify_data_map(struct pnfs_osd_layout *layout)
 {
 	struct pnfs_osd_data_map *data_map = &layout->olo_map;
 	u64 stripe_length;
+	u32 group_width;
 
-/* FIXME: Only raid0 !group_width/depth for now. if not so, do not mount */
-	if (data_map->odm_group_width || data_map->odm_group_depth) {
-		printk(KERN_ERR "Group width/depth not supported\n");
-		return -ENOTSUPP;
-	}
+/* FIXME: Only raid0 for now. if not go through MDS */
 	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
 		printk(KERN_ERR "Only RAID_0 for now\n");
 		return -ENOTSUPP;
@@ -268,8 +267,13 @@ static int _verify_data_map(struct pnfs_osd_layout *layout)
 		return -EINVAL;
 	}
 
-	stripe_length = data_map->odm_stripe_unit * (data_map->odm_num_comps /
-						(data_map->odm_mirror_cnt + 1));
+	if (data_map->odm_group_width)
+		group_width = data_map->odm_group_width;
+	else
+		group_width = data_map->odm_num_comps /
+						(data_map->odm_mirror_cnt + 1);
+
+	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
 	if (stripe_length >= (1ULL << 32)) {
 		printk(KERN_ERR "Total Stripe length(0x%llx)"
 			  " >= 32bit is not supported\n", _LLU(stripe_length));
@@ -311,8 +315,18 @@ int objio_alloc_lseg(void **outp,
 
 	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
 	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
-	objio_seg->group_width = layout->olo_map.odm_num_comps /
-							objio_seg->mirrors_p1;
+	if (layout->olo_map.odm_group_width) {
+		objio_seg->group_width = layout->olo_map.odm_group_width;
+		objio_seg->group_depth = layout->olo_map.odm_group_depth;
+		objio_seg->group_count = layout->olo_map.odm_num_comps /
+						objio_seg->mirrors_p1 /
+						objio_seg->group_width;
+	} else {
+		objio_seg->group_width = layout->olo_map.odm_num_comps /
+						objio_seg->mirrors_p1;
+		objio_seg->group_depth = -1;
+		objio_seg->group_count = 1;
+	}
 
 	*outp = objio_seg;
 	return 0;
@@ -483,6 +497,9 @@ struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
 
 struct _striping_info {
 	u64 obj_offset;
+	u64 group_length;
+	u64 total_group_length;
+	u64 Major;
 	unsigned dev;
 	unsigned unit_off;
 };
@@ -492,15 +509,34 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
 {
 	u32	stripe_unit = ios->objio_seg->stripe_unit;
 	u32	group_width = ios->objio_seg->group_width;
+	u64	group_depth = ios->objio_seg->group_depth;
 	u32	U = stripe_unit * group_width;
 
-	u32	LmodU;
-	u64 	N = div_u64_rem(file_offset, U, &LmodU);
+	u64	T = U * group_depth;
+	u64	S = T * ios->objio_seg->group_count;
+	u64	M = div64_u64(file_offset, S);
+
+	/*
+	G = (L - (M * S)) / T
+	H = (L - (M * S)) % T
+	*/
+	u64	LmodU = file_offset - M * S;
+	u32	G = div64_u64(LmodU, T);
+	u64	H = LmodU - G * T;
+
+	u32	N = div_u64(H, U);
+
+	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
+	si->obj_offset = si->unit_off + (N * stripe_unit) +
+				  (M * group_depth * stripe_unit);
 
-	si->unit_off = LmodU % stripe_unit;
-	si->obj_offset = N * stripe_unit + si->unit_off;
-	si->dev = LmodU / stripe_unit;
+	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
+	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
 	si->dev *= ios->objio_seg->mirrors_p1;
+
+	si->group_length = T - H;
+	si->total_group_length = T;
+	si->Major = M;
 }
 
 static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
@@ -547,15 +583,18 @@ static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
 	return 0;
 }
 
-static int _prepare_pages(struct objio_state *ios, struct _striping_info *si)
+static int _prepare_one_group(struct objio_state *ios, u64 length,
+			      struct _striping_info *si, unsigned first_comp,
+			      unsigned *last_pg)
 {
-	u64 length = ios->ol_state.count;
 	unsigned stripe_unit = ios->objio_seg->stripe_unit;
 	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
+	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
 	unsigned dev = si->dev;
-	unsigned comp = 0;
-	unsigned stripes = 0;
-	unsigned cur_pg = 0;
+	unsigned first_dev = dev - (dev % devs_in_group);
+	unsigned comp = first_comp + (dev - first_dev);
+	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
+	unsigned cur_pg = *last_pg;
 	int ret = 0;
 
 	while (length) {
@@ -579,10 +618,11 @@ static int _prepare_pages(struct objio_state *ios, struct _striping_info *si)
 				cur_len = stripe_unit;
 			}
 
-			stripes++;
+			if (max_comp < comp)
+				max_comp = comp;
 
 			dev += mirrors_p1;
-			dev %= ios->ol_state.num_comps;
+			dev = (dev % devs_in_group) + first_dev;
 		} else {
 			cur_len = stripe_unit;
 		}
@@ -595,25 +635,58 @@ static int _prepare_pages(struct objio_state *ios, struct _striping_info *si)
 			goto out;
 
 		comp += mirrors_p1;
-		comp %= ios->ol_state.num_comps;
+		comp = (comp % devs_in_group) + first_comp;
 
 		length -= cur_len;
 		ios->length += cur_len;
 	}
 out:
-	if (!ios->length)
-		return ret;
-
-	ios->numdevs = stripes * mirrors_p1;
-	return 0;
+	ios->numdevs = max_comp + mirrors_p1;
+	*last_pg = cur_pg;
+	return ret;
 }
 
 static int _io_rw_pagelist(struct objio_state *ios)
 {
+	u64 length = ios->ol_state.count;
 	struct _striping_info si;
+	unsigned devs_in_group = ios->objio_seg->group_width *
+				 ios->objio_seg->mirrors_p1;
+	unsigned first_comp = 0;
+	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
+	unsigned last_pg = 0;
+	int ret = 0;
 
-	_calc_stripe_info(ios, ios->ol_state.count, &si);
-	return _prepare_pages(ios, &si);
+	_calc_stripe_info(ios, ios->ol_state.offset, &si);
+	while (length) {
+		if (length < si.group_length)
+			si.group_length = length;
+
+		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
+					 &last_pg);
+		if (unlikely(ret))
+			goto out;
+
+		length -= si.group_length;
+
+		si.group_length = si.total_group_length;
+		si.unit_off = 0;
+		++si.Major;
+		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
+						ios->objio_seg->group_depth;
+
+		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
+		si.dev %= num_comps;
+
+		first_comp += devs_in_group;
+		first_comp %= num_comps;
+	}
+
+out:
+	if (!ios->length)
+		return ret;
+
+	return 0;
 }
 
 static ssize_t _sync_done(struct objio_state *ios)
@@ -735,6 +808,8 @@ static ssize_t _read_exec(struct objio_state *ios)
 	int ret;
 
 	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
+		if (!ios->per_dev[i].length)
+			continue;
 		ret = _read_mirrors(ios, i);
 		if (unlikely(ret))
 			goto err;
@@ -855,6 +930,8 @@ static ssize_t _write_exec(struct objio_state *ios)
 	int ret;
 
 	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
+		if (!ios->per_dev[i].length)
+			continue;
 		ret = _write_mirrors(ios, i);
 		if (unlikely(ret))
 			goto err;
-- 
1.7.3.4


      parent reply	other threads:[~2011-05-16 16:24 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-16 16:15 [PATCH v3 0/29] pnfs for 2.6.40 Benny Halevy
2011-05-16 16:20 ` [PATCH v3 01/29] pnfs: CB_NOTIFY_DEVICEID Benny Halevy
2011-05-16 16:20 ` [PATCH v3 02/29] pnfs: Use byte-range for layoutget Benny Halevy
2011-05-16 16:20 ` [PATCH v3 03/29] pnfs: align layoutget requests on page boundaries Benny Halevy
2011-05-16 16:21 ` [PATCH v3 04/29] pnfs: Use byte-range for cb_layoutrecall Benny Halevy
2011-05-16 16:21 ` [PATCH v3 05/29] pnfs: client stats Benny Halevy
2011-05-16 16:21 ` [PATCH v3 06/29] pnfs: resolve header dependency in pnfs.h Benny Halevy
2011-05-16 16:21 ` [PATCH v3 07/29] pnfs-obj: objlayoutdriver module skeleton Benny Halevy
2011-05-16 16:21 ` [PATCH v3 08/29] NFSD: introduce exp_xdr.h Benny Halevy
2011-05-16 16:21 ` [PATCH v3 09/29] pnfs-obj: pnfs_osd XDR definitions Benny Halevy
2011-05-16 16:21 ` [PATCH v3 10/29] exofs: pnfs-tree: Remove pnfs-osd private definitions Benny Halevy
2011-05-16 16:22 ` [PATCH v3 11/29] pnfs-obj: pnfs_osd XDR client implementation Benny Halevy
2011-05-16 16:22 ` [PATCH v3 12/29] pnfs-obj: decode layout, alloc/free lseg Benny Halevy
     [not found]   ` <4DD43666.5040304@panasas.com>
2011-05-19 12:46     ` Boaz Harrosh
2011-05-16 16:22 ` [PATCH v3 13/29] pnfs: per mount layout driver private data Benny Halevy
2011-05-16 16:22 ` [PATCH v3 14/29] pnfs-obj: objio_osd device information retrieval and caching Benny Halevy
2011-05-16 16:22 ` [PATCH v3 15/29] pnfs: set/unset layoutdriver Benny Halevy
2011-05-16 16:22 ` [PATCH v3 16/29] pnfs-obj: objlayout set/unset layout driver methods Benny Halevy
2011-05-16 16:22 ` [PATCH v3 17/29] pnfs: alloc and free layout_hdr layoutdriver methods Benny Halevy
2011-05-16 16:23 ` [PATCH v3 18/29] pnfs: support for non-rpc layout drivers Benny Halevy
2011-05-16 16:23 ` [PATCH v3 19/29] pnfs-obj: read/write implementation Benny Halevy
2011-05-16 16:23 ` [PATCH v3 20/29] pnfs: layoutreturn Benny Halevy
2011-05-16 16:23 ` [PATCH v3 21/29] pnfs: layoutret_on_setattr Benny Halevy
2011-05-16 16:23 ` [PATCH v3 22/29] pnfs: encode_layoutreturn Benny Halevy
2011-05-16 16:23 ` [PATCH v3 23/29] sunrpc: xdr_rewind_stream() Benny Halevy
2011-05-16 16:23 ` [PATCH v3 24/29] pnfs-obj: objlayout_encode_layoutreturn Implementation Benny Halevy
2011-05-16 16:24 ` [PATCH v3 25/29] pnfs-obj: objio_osd report osd_errors for layoutreturn Benny Halevy
2011-05-16 16:24 ` [PATCH v3 26/29] pnfs: encode_layoutcommit Benny Halevy
2011-05-16 16:24 ` [PATCH v3 27/29] pnfs-obj: objlayout_encode_layoutcommit implementation Benny Halevy
2011-05-16 16:24 ` [PATCH v3 28/29] pnfs-obj: objio_osd: RAID0 support Benny Halevy
2011-05-16 16:24 ` Benny Halevy [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1305563076-8362-1-git-send-email-bhalevy@panasas.com \
    --to=bhalevy@panasas.com \
    --cc=Trond.Myklebust@netapp.com \
    --cc=bharrosh@panasas.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.