All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boaz Harrosh <bharrosh@panasas.com>
To: Trond Myklebust <Trond.Myklebust@netapp.com>,
	Benny Halevy <bhalevy@panasas.com>,
	NFS list <linux-nfs@vger.kernel.org>,
	open-osd <osd-dev@open-osd.org>
Subject: [PATCH 25/32] pnfs-obj: report errors and .encode_layoutreturn Implementation.
Date: Sun, 29 May 2011 21:26:40 +0300	[thread overview]
Message-ID: <1306693600-8409-1-git-send-email-bharrosh@panasas.com> (raw)
In-Reply-To: <4DE28CFB.4060608@panasas.com>

An io_state pre-allocates an error information structure for each
possible osd-device that might error during IO. When IO is done if all
was well the io_state is freed. (as today). If the I/O has ended with an
error, the io_state is queued on a per-layout err_list. When eventually
encode_layoutreturn() is called, each error is properly encoded on the
XDR buffer and only then the io_state is removed from err_list and
de-allocated.

It is up to the io_engine to fill in the segment that fault and the type
of osd_error that occurred. By calling objlayout_io_set_result() for
each failing device.

In objio_osd:
* Allocate io-error descriptors space as part of io_state
* Use generic objlayout error reporting at end of io.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c |   44 ++++++++-
 fs/nfs/objlayout/objlayout.c |  232 +++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/objlayout/objlayout.h |   23 ++++
 3 files changed, 297 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4e8de3e..8bca5e1 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -396,12 +396,16 @@ int objio_alloc_io_state(struct pnfs_layout_segment *lseg,
 	struct objio_state *ios;
 	const unsigned first_size = sizeof(*ios) +
 				objio_seg->num_comps * sizeof(ios->per_dev[0]);
+	const unsigned sec_size = objio_seg->num_comps *
+						sizeof(ios->ol_state.ioerrs[0]);
 
-	ios = kzalloc(first_size, gfp_flags);
+	ios = kzalloc(first_size + sec_size, gfp_flags);
 	if (unlikely(!ios))
 		return -ENOMEM;
 
 	ios->layout = objio_seg;
+	ios->ol_state.ioerrs = ((void *)ios) + first_size;
+	ios->ol_state.num_comps = objio_seg->num_comps;
 
 	*outp = &ios->ol_state;
 	return 0;
@@ -415,6 +419,36 @@ void objio_free_io_state(struct objlayout_io_state *ol_state)
 	kfree(ios);
 }
 
+enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
+{
+	switch (oep) {
+	case OSD_ERR_PRI_NO_ERROR:
+		return (enum pnfs_osd_errno)0;
+
+	case OSD_ERR_PRI_CLEAR_PAGES:
+		BUG_ON(1);
+		return 0;
+
+	case OSD_ERR_PRI_RESOURCE:
+		return PNFS_OSD_ERR_RESOURCE;
+	case OSD_ERR_PRI_BAD_CRED:
+		return PNFS_OSD_ERR_BAD_CRED;
+	case OSD_ERR_PRI_NO_ACCESS:
+		return PNFS_OSD_ERR_NO_ACCESS;
+	case OSD_ERR_PRI_UNREACHABLE:
+		return PNFS_OSD_ERR_UNREACHABLE;
+	case OSD_ERR_PRI_NOT_FOUND:
+		return PNFS_OSD_ERR_NOT_FOUND;
+	case OSD_ERR_PRI_NO_SPACE:
+		return PNFS_OSD_ERR_NO_SPACE;
+	default:
+		WARN_ON(1);
+		/* fallthrough */
+	case OSD_ERR_PRI_EIO:
+		return PNFS_OSD_ERR_EIO;
+	}
+}
+
 static void _clear_bio(struct bio *bio)
 {
 	struct bio_vec *bv;
@@ -461,6 +495,12 @@ static int _io_check(struct objio_state *ios, bool is_write)
 			continue; /* we recovered */
 		}
 		dev = ios->per_dev[i].dev;
+		objlayout_io_set_result(&ios->ol_state, dev,
+					&ios->layout->comps[dev].oc_object_id,
+					osd_pri_2_pnfs_err(osi.osd_err_pri),
+					ios->per_dev[i].offset,
+					ios->per_dev[i].length,
+					is_write);
 
 		if (osi.osd_err_pri >= oep) {
 			oep = osi.osd_err_pri;
@@ -977,6 +1017,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
 	.pg_test                 = objlayout_pg_test,
 
 	.free_deviceid_node	 = objio_free_deviceid_node,
+
+	.encode_layoutreturn     = objlayout_encode_layoutreturn,
 };
 
 MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 5157ef6..f7caecf 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -50,6 +50,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
 	struct objlayout *objlay;
 
 	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
+	if (objlay) {
+		spin_lock_init(&objlay->lock);
+		INIT_LIST_HEAD(&objlay->err_list);
+	}
 	dprintk("%s: Return %p\n", __func__, objlay);
 	return &objlay->pnfs_layout;
 }
@@ -64,6 +68,7 @@ objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
 
 	dprintk("%s: objlay %p\n", __func__, objlay);
 
+	WARN_ON(!list_empty(&objlay->err_list));
 	kfree(objlay);
 }
 
@@ -183,6 +188,7 @@ objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
 		pgbase &= ~PAGE_MASK;
 	}
 
+	INIT_LIST_HEAD(&state->err_list);
 	state->lseg = lseg;
 	state->rpcdata = rpcdata;
 	state->pages = pages;
@@ -213,7 +219,52 @@ objlayout_iodone(struct objlayout_io_state *state)
 {
 	dprintk("%s: state %p status\n", __func__, state);
 
-	objlayout_free_io_state(state);
+	if (likely(state->status >= 0)) {
+		objlayout_free_io_state(state);
+	} else {
+		struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
+
+		spin_lock(&objlay->lock);
+		list_add(&objlay->err_list, &state->err_list);
+		spin_unlock(&objlay->lock);
+	}
+}
+
+/*
+ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
+ *
+ * The @index component IO failed (error returned from target). Register
+ * the error for later reporting at layout-return.
+ */
+void
+objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
+			struct pnfs_osd_objid *pooid, int osd_error,
+			u64 offset, u64 length, bool is_write)
+{
+	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
+
+	BUG_ON(index >= state->num_comps);
+	if (osd_error) {
+		ioerr->oer_component = *pooid;
+		ioerr->oer_comp_offset = offset;
+		ioerr->oer_comp_length = length;
+		ioerr->oer_iswrite = is_write;
+		ioerr->oer_errno = osd_error;
+
+		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
+			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
+			__func__, index, ioerr->oer_errno,
+			ioerr->oer_iswrite,
+			_DEVID_LO(&ioerr->oer_component.oid_device_id),
+			_DEVID_HI(&ioerr->oer_component.oid_device_id),
+			ioerr->oer_component.oid_partition_id,
+			ioerr->oer_component.oid_object_id,
+			ioerr->oer_comp_offset,
+			ioerr->oer_comp_length);
+	} else {
+		/* User need not call if no error is reported */
+		ioerr->oer_errno = 0;
+	}
 }
 
 /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
@@ -382,6 +433,185 @@ objlayout_write_pagelist(struct nfs_write_data *wdata,
 	return PNFS_ATTEMPTED;
 }
 
+static int
+err_prio(u32 oer_errno)
+{
+	switch (oer_errno) {
+	case 0:
+		return 0;
+
+	case PNFS_OSD_ERR_RESOURCE:
+		return OSD_ERR_PRI_RESOURCE;
+	case PNFS_OSD_ERR_BAD_CRED:
+		return OSD_ERR_PRI_BAD_CRED;
+	case PNFS_OSD_ERR_NO_ACCESS:
+		return OSD_ERR_PRI_NO_ACCESS;
+	case PNFS_OSD_ERR_UNREACHABLE:
+		return OSD_ERR_PRI_UNREACHABLE;
+	case PNFS_OSD_ERR_NOT_FOUND:
+		return OSD_ERR_PRI_NOT_FOUND;
+	case PNFS_OSD_ERR_NO_SPACE:
+		return OSD_ERR_PRI_NO_SPACE;
+	default:
+		WARN_ON(1);
+		/* fallthrough */
+	case PNFS_OSD_ERR_EIO:
+		return OSD_ERR_PRI_EIO;
+	}
+}
+
+static void
+merge_ioerr(struct pnfs_osd_ioerr *dest_err,
+	    const struct pnfs_osd_ioerr *src_err)
+{
+	u64 dest_end, src_end;
+
+	if (!dest_err->oer_errno) {
+		*dest_err = *src_err;
+		/* accumulated device must be blank */
+		memset(&dest_err->oer_component.oid_device_id, 0,
+			sizeof(dest_err->oer_component.oid_device_id));
+
+		return;
+	}
+
+	if (dest_err->oer_component.oid_partition_id !=
+				src_err->oer_component.oid_partition_id)
+		dest_err->oer_component.oid_partition_id = 0;
+
+	if (dest_err->oer_component.oid_object_id !=
+				src_err->oer_component.oid_object_id)
+		dest_err->oer_component.oid_object_id = 0;
+
+	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
+		dest_err->oer_comp_offset = src_err->oer_comp_offset;
+
+	dest_end = end_offset(dest_err->oer_comp_offset,
+			      dest_err->oer_comp_length);
+	src_end =  end_offset(src_err->oer_comp_offset,
+			      src_err->oer_comp_length);
+	if (dest_end < src_end)
+		dest_end = src_end;
+
+	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
+
+	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
+	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
+			dest_err->oer_errno = src_err->oer_errno;
+	} else if (src_err->oer_iswrite) {
+		dest_err->oer_iswrite = true;
+		dest_err->oer_errno = src_err->oer_errno;
+	}
+}
+
+static void
+encode_accumulated_error(struct objlayout *objlay, __be32 *p)
+{
+	struct objlayout_io_state *state, *tmp;
+	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
+
+	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
+		unsigned i;
+
+		for (i = 0; i < state->num_comps; i++) {
+			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
+
+			if (!ioerr->oer_errno)
+				continue;
+
+			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
+				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
+				"offset=0x%llx length=0x%llx\n",
+				__func__, i, ioerr->oer_errno,
+				ioerr->oer_iswrite,
+				_DEVID_LO(&ioerr->oer_component.oid_device_id),
+				_DEVID_HI(&ioerr->oer_component.oid_device_id),
+				ioerr->oer_component.oid_partition_id,
+				ioerr->oer_component.oid_object_id,
+				ioerr->oer_comp_offset,
+				ioerr->oer_comp_length);
+
+			merge_ioerr(&accumulated_err, ioerr);
+		}
+		list_del(&state->err_list);
+		objlayout_free_io_state(state);
+	}
+
+	pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
+}
+
+void
+objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
+			      struct xdr_stream *xdr,
+			      const struct nfs4_layoutreturn_args *args)
+{
+	struct objlayout *objlay = OBJLAYOUT(pnfslay);
+	struct objlayout_io_state *state, *tmp;
+	__be32 *start;
+
+	dprintk("%s: Begin\n", __func__);
+	start = xdr_reserve_space(xdr, 4);
+	BUG_ON(!start);
+
+	spin_lock(&objlay->lock);
+
+	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
+		__be32 *last_xdr = NULL, *p;
+		unsigned i;
+		int res = 0;
+
+		for (i = 0; i < state->num_comps; i++) {
+			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
+
+			if (!ioerr->oer_errno)
+				continue;
+
+			dprintk("%s: err[%d]: errno=%d is_write=%d "
+				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
+				"offset=0x%llx length=0x%llx\n",
+				__func__, i, ioerr->oer_errno,
+				ioerr->oer_iswrite,
+				_DEVID_LO(&ioerr->oer_component.oid_device_id),
+				_DEVID_HI(&ioerr->oer_component.oid_device_id),
+				ioerr->oer_component.oid_partition_id,
+				ioerr->oer_component.oid_object_id,
+				ioerr->oer_comp_offset,
+				ioerr->oer_comp_length);
+
+			p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
+			if (unlikely(!p)) {
+				res = -E2BIG;
+				break; /* accumulated_error */
+			}
+
+			last_xdr = p;
+			pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]);
+		}
+
+		/* TODO: use xdr_write_pages */
+		if (unlikely(res)) {
+			/* no space for even one error descriptor */
+			BUG_ON(!last_xdr);
+
+			/* we've encountered a situation with lots and lots of
+			 * errors and no space to encode them all. Use the last
+			 * available slot to report the union of all the
+			 * remaining errors.
+			 */
+			encode_accumulated_error(objlay, last_xdr);
+			goto loop_done;
+		}
+		list_del(&state->err_list);
+		objlayout_free_io_state(state);
+	}
+loop_done:
+	spin_unlock(&objlay->lock);
+
+	*start = cpu_to_be32((xdr->p - start - 1) * 4);
+	dprintk("%s: Return\n", __func__);
+}
+
+
 /*
  * Get Device Info API for io engines
  */
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 9a405e8..b0bb975 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -50,6 +50,10 @@
  */
 struct objlayout {
 	struct pnfs_layout_hdr pnfs_layout;
+
+	 /* for layout_return */
+	spinlock_t lock;
+	struct list_head err_list;
 };
 
 static inline struct objlayout *
@@ -76,6 +80,16 @@ struct objlayout_io_state {
 	int status;             /* res */
 	int eof;                /* res */
 	int committed;          /* res */
+
+	/* Error reporting (layout_return) */
+	struct list_head err_list;
+	unsigned num_comps;
+	/* Pointer to array of error descriptors of size num_comps.
+	 * It should contain as many entries as devices in the osd_layout
+	 * that participate in the I/O. It is up to the io_engine to allocate
+	 * needed space and set num_comps.
+	 */
+	struct pnfs_osd_ioerr *ioerrs;
 };
 
 /*
@@ -101,6 +115,10 @@ extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
 /*
  * callback API
  */
+extern void objlayout_io_set_result(struct objlayout_io_state *state,
+			unsigned index, struct pnfs_osd_objid *pooid,
+			int osd_error, u64 offset, u64 length, bool is_write);
+
 extern void objlayout_read_done(struct objlayout_io_state *state,
 				ssize_t status, bool sync);
 extern void objlayout_write_done(struct objlayout_io_state *state,
@@ -131,4 +149,9 @@ extern enum pnfs_try_status objlayout_write_pagelist(
 	struct nfs_write_data *,
 	int how);
 
+extern void objlayout_encode_layoutreturn(
+	struct pnfs_layout_hdr *,
+	struct xdr_stream *,
+	const struct nfs4_layoutreturn_args *);
+
 #endif /* _OBJLAYOUT_H */
-- 
1.7.2.3


  parent reply	other threads:[~2011-05-29 18:26 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-29 18:14 [PATCHSET final 00/32] pnfs-obj: pnfs-objects layout driver submission Boaz Harrosh
2011-05-29 18:16 ` [PATCH 01/32] NFSv4.1: fix typo in filelayout_check_layout Boaz Harrosh
2011-05-29 18:17 ` [PATCH 02/32] NFSv4.1: use struct nfs_client to qualify deviceid Boaz Harrosh
2011-05-29 18:17 ` [PATCH 03/32] pnfs: resolve header dependency in pnfs.h Boaz Harrosh
2011-05-29 18:18 ` [PATCH 04/32] NFSv4.1: make deviceid cache global Boaz Harrosh
2011-05-29 18:18 ` [PATCH 05/32] NFSv4.1: purge deviceid cache on nfs_free_client Boaz Harrosh
2011-05-29 18:19 ` [PATCH 06/32] pnfs: CB_NOTIFY_DEVICEID Boaz Harrosh
2011-05-29 18:19 ` [PATCH 07/32] NFSv4.1: use layout driver in global device cache Boaz Harrosh
2011-05-29 18:19 ` [PATCH 08/32] SUNRPC: introduce xdr_init_decode_pages Boaz Harrosh
2011-05-29 18:19 ` [PATCH 09/32] pnfs: Use byte-range for layoutget Boaz Harrosh
2011-05-29 18:20 ` [PATCH 10/32] pnfs: align layoutget requests on page boundaries Boaz Harrosh
2011-05-29 18:20 ` [PATCH 11/32] pnfs: Use byte-range for cb_layoutrecall Boaz Harrosh
2011-05-29 18:21 ` [PATCH 12/32] pnfs: client stats Boaz Harrosh
2011-05-29 18:21 ` [PATCH 13/32] pnfs-obj: objlayoutdriver module skeleton Boaz Harrosh
2011-05-29 18:21 ` [PATCH 14/32] pnfs-obj: pnfs_osd XDR definitions Boaz Harrosh
2011-05-29 18:22 ` [PATCH 15/32] pnfs-obj: pnfs_osd XDR client implementation Boaz Harrosh
2011-05-29 18:22 ` [PATCH 16/32] pnfs-obj: decode layout, alloc/free lseg Boaz Harrosh
2011-05-29 18:22 ` [PATCH 17/32] pnfs-obj: objio_osd device information retrieval and caching Boaz Harrosh
2011-05-29 18:23 ` [PATCH 18/32] pnfs: alloc and free layout_hdr layoutdriver methods Boaz Harrosh
2011-05-29 18:23 ` [PATCH 19/32] pnfs-obj: define per-inode private structure Boaz Harrosh
2011-05-29 18:24 ` [PATCH 20/32] pnfs: support for non-rpc layout drivers Boaz Harrosh
2011-05-29 18:25 ` [PATCH 21/32] pnfs-obj: osd raid engine read/write implementation Boaz Harrosh
2011-05-29 18:25 ` [PATCH 22/32] pnfs: layoutreturn Boaz Harrosh
2011-05-29 18:26 ` [PATCH 23/32] pnfs: layoutret_on_setattr Boaz Harrosh
2011-05-29 18:26 ` [PATCH 24/32] pnfs: encode_layoutreturn Boaz Harrosh
2011-05-29 18:26 ` Boaz Harrosh [this message]
2011-05-29 18:27 ` [PATCH 26/32] pnfs: encode_layoutcommit Boaz Harrosh
2011-05-29 18:27 ` [PATCH 27/32] pnfs-obj: objlayout_encode_layoutcommit implementation Boaz Harrosh
2011-05-29 18:27 ` [PATCH 28/32] NFSv4.1: unify pnfs_pageio_init functions Boaz Harrosh
2011-05-29 18:28 ` [PATCH 29/32] NFSv4.1: change pg_test return type to bool Boaz Harrosh
2011-05-29 18:28 ` [PATCH 30/32] NFSv4.1: use pnfs_generic_pg_test directly by layout driver Boaz Harrosh
2011-05-29 18:29 ` [PATCH 31/32] NFSv4.1: define nfs_generic_pg_test Boaz Harrosh
2011-05-29 18:30 ` [PATCH 32/32] pnfs-obj: pg_test check for max_io_size Boaz Harrosh
2011-05-29 18:36 ` [osd-dev] [PATCHSET final 00/32] pnfs-obj: pnfs-objects layout driver submission Boaz Harrosh
2011-05-31  5:58 ` Benny Halevy
2011-05-31 23:32   ` Jim Rees
2011-06-01 11:55     ` Benny Halevy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1306693600-8409-1-git-send-email-bharrosh@panasas.com \
    --to=bharrosh@panasas.com \
    --cc=Trond.Myklebust@netapp.com \
    --cc=bhalevy@panasas.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=osd-dev@open-osd.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.