All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Martin Brandenburg <martin@omnibond.com>
Cc: Mike Marshall <hubcap@omnibond.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	Stephen Rothwell <sfr@canb.auug.org.au>
Subject: Re: Orangefs ABI documentation
Date: Thu, 18 Feb 2016 11:11:22 +0000	[thread overview]
Message-ID: <20160218111122.GS17997@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20160218000439.GR17997@ZenIV.linux.org.uk>

On Thu, Feb 18, 2016 at 12:04:39AM +0000, Al Viro wrote:
> Looks like the right approach is to have orangefs_clean_... hitting the
> sucker being copied to/from daemon to wait until that's finished (and
> discarded).  That, BTW, would have an extra benefit of making life simpler
> for refcounting.
> 
> So...  We need to have them marked as "being copied" for the duration, instead
> of bumping the refcount.  That setting and dropping that flag should happen
> under op->lock.  Setting it should happen only if it's not given up (that would
> be interpreted as "not found").  Cleaning, OTOH, would recheck the "given up"
> and do complete(&op->waitq) in case it's been given up...
> 
> How about this (instead of the previous variant, includes a fix for
> errno bogosity spotted a bit upthread; if it works, it'll need a bit of
> splitup)

Better yet, let's use list_del_init() on op->list instead of those list_del().
Then, seeing that ..._clean_interrupted_... can't be called in case of
serviced (we hadn't dropped op->lock since the time we'd checked it), we
can use list_empty(&op->list) as a test for "given up while copying to/from
daemon", so there's no need for separate flag that way:
	* we never pick given up op from list/hash
	* daemon read/write_iter never modifies op->list after op has
been given up
	* if op is given up while copying to/from userland in daemon
read/write_iter, it will call complete(&op->waitq) once it finds that,
so giveup side can wait for completion if it finds op it's about to give
up not on any list.

Should be equivalent to the previous variant, but IMO it's cleaner that
way...

diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index b27ed1c..f7914f5 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -58,9 +58,9 @@ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
 				 next,
 				 &htable_ops_in_progress[index],
 				 list) {
-		if (op->tag == tag && !op_state_purged(op)) {
+		if (op->tag == tag && !op_state_purged(op) &&
+		    !op_state_given_up(op)) {
 			list_del_init(&op->list);
-			get_op(op); /* increase ref count. */
 			spin_unlock(&htable_ops_in_progress_lock);
 			return op;
 		}
@@ -133,7 +133,7 @@ restart:
 		__s32 fsid;
 		/* This lock is held past the end of the loop when we break. */
 		spin_lock(&op->lock);
-		if (unlikely(op_state_purged(op))) {
+		if (unlikely(op_state_purged(op) || op_state_given_up(op))) {
 			spin_unlock(&op->lock);
 			continue;
 		}
@@ -199,13 +199,12 @@ restart:
 	 */
 	if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
 		gossip_err("orangefs: ERROR: Current op already queued.\n");
-		list_del(&cur_op->list);
+		list_del_init(&cur_op->list);
 		spin_unlock(&cur_op->lock);
 		spin_unlock(&orangefs_request_list_lock);
 		return -EAGAIN;
 	}
 	list_del_init(&cur_op->list);
-	get_op(op);
 	spin_unlock(&orangefs_request_list_lock);
 
 	spin_unlock(&cur_op->lock);
@@ -230,7 +229,7 @@ restart:
 	if (unlikely(op_state_given_up(cur_op))) {
 		spin_unlock(&cur_op->lock);
 		spin_unlock(&htable_ops_in_progress_lock);
-		op_release(cur_op);
+		complete(&cur_op->waitq);
 		goto restart;
 	}
 
@@ -242,7 +241,6 @@ restart:
 	orangefs_devreq_add_op(cur_op);
 	spin_unlock(&cur_op->lock);
 	spin_unlock(&htable_ops_in_progress_lock);
-	op_release(cur_op);
 
 	/* The client only asks to read one size buffer. */
 	return MAX_DEV_REQ_UPSIZE;
@@ -258,10 +256,12 @@ error:
 	if (likely(!op_state_given_up(cur_op))) {
 		set_op_state_waiting(cur_op);
 		list_add(&cur_op->list, &orangefs_request_list);
+		spin_unlock(&cur_op->lock);
+	} else {
+		spin_unlock(&cur_op->lock);
+		complete(&cur_op->waitq);
 	}
-	spin_unlock(&cur_op->lock);
 	spin_unlock(&orangefs_request_list_lock);
-	op_release(cur_op);
 	return -EFAULT;
 }
 
@@ -333,8 +333,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 	n = copy_from_iter(&op->downcall, downcall_size, iter);
 	if (n != downcall_size) {
 		gossip_err("%s: failed to copy downcall.\n", __func__);
-		ret = -EFAULT;
-		goto Broken;
+		goto Efault;
 	}
 
 	if (op->downcall.status)
@@ -354,8 +353,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 			   downcall_size,
 			   op->downcall.trailer_size,
 			   total);
-		ret = -EFAULT;
-		goto Broken;
+		goto Efault;
 	}
 
 	/* Only READDIR operations should have trailers. */
@@ -364,8 +362,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 		gossip_err("%s: %x operation with trailer.",
 			   __func__,
 			   op->downcall.type);
-		ret = -EFAULT;
-		goto Broken;
+		goto Efault;
 	}
 
 	/* READDIR operations should always have trailers. */
@@ -374,8 +371,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 		gossip_err("%s: %x operation with no trailer.",
 			   __func__,
 			   op->downcall.type);
-		ret = -EFAULT;
-		goto Broken;
+		goto Efault;
 	}
 
 	if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
@@ -386,8 +382,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 	if (op->downcall.trailer_buf == NULL) {
 		gossip_err("%s: failed trailer vmalloc.\n",
 			   __func__);
-		ret = -ENOMEM;
-		goto Broken;
+		goto Enomem;
 	}
 	memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
 	n = copy_from_iter(op->downcall.trailer_buf,
@@ -396,8 +391,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 	if (n != op->downcall.trailer_size) {
 		gossip_err("%s: failed to copy trailer.\n", __func__);
 		vfree(op->downcall.trailer_buf);
-		ret = -EFAULT;
-		goto Broken;
+		goto Efault;
 	}
 
 wakeup:
@@ -406,38 +400,27 @@ wakeup:
 	 * that this op is done
 	 */
 	spin_lock(&op->lock);
-	if (unlikely(op_state_given_up(op))) {
+	if (unlikely(op_is_cancel(op))) {
 		spin_unlock(&op->lock);
-		goto out;
-	}
-	set_op_state_serviced(op);
-	spin_unlock(&op->lock);
-
-	/*
-	 * If this operation is an I/O operation we need to wait
-	 * for all data to be copied before we can return to avoid
-	 * buffer corruption and races that can pull the buffers
-	 * out from under us.
-	 *
-	 * Essentially we're synchronizing with other parts of the
-	 * vfs implicitly by not allowing the user space
-	 * application reading/writing this device to return until
-	 * the buffers are done being used.
-	 */
-out:
-	if (unlikely(op_is_cancel(op)))
 		put_cancel(op);
-	op_release(op);
-	return ret;
-
-Broken:
-	spin_lock(&op->lock);
-	if (!op_state_given_up(op)) {
-		op->downcall.status = ret;
+	} else if (unlikely(op_state_given_up(op))) {
+		spin_unlock(&op->lock);
+		complete(&op->waitq);
+	} else {
 		set_op_state_serviced(op);
+		spin_unlock(&op->lock);
 	}
-	spin_unlock(&op->lock);
-	goto out;
+	return ret;
+
+Efault:
+	op->downcall.status = -(ORANGEFS_ERROR_BIT | 9);
+	ret = -EFAULT;
+	goto wakeup;
+
+Enomem:
+	op->downcall.status = -(ORANGEFS_ERROR_BIT | 8);
+	ret = -ENOMEM;
+	goto wakeup;
 }
 
 /* Returns whether any FS are still pending remounted */
diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c
index 817092a..900a2e3 100644
--- a/fs/orangefs/orangefs-cache.c
+++ b/fs/orangefs/orangefs-cache.c
@@ -120,8 +120,6 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type)
 		spin_lock_init(&new_op->lock);
 		init_completion(&new_op->waitq);
 
-		atomic_set(&new_op->ref_count, 1);
-
 		new_op->upcall.type = ORANGEFS_VFS_OP_INVALID;
 		new_op->downcall.type = ORANGEFS_VFS_OP_INVALID;
 		new_op->downcall.status = -1;
@@ -149,7 +147,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type)
 	return new_op;
 }
 
-void __op_release(struct orangefs_kernel_op_s *orangefs_op)
+void op_release(struct orangefs_kernel_op_s *orangefs_op)
 {
 	if (orangefs_op) {
 		gossip_debug(GOSSIP_CACHE_DEBUG,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 1f8310c..e387d3c 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -205,8 +205,6 @@ struct orangefs_kernel_op_s {
 	struct completion waitq;
 	spinlock_t lock;
 
-	atomic_t ref_count;
-
 	/* VFS aio fields */
 
 	int attempts;
@@ -230,23 +228,7 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
 #define op_state_given_up(op)    ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
 #define op_is_cancel(op)         ((op)->downcall.type == ORANGEFS_VFS_OP_CANCEL)
 
-static inline void get_op(struct orangefs_kernel_op_s *op)
-{
-	atomic_inc(&op->ref_count);
-	gossip_debug(GOSSIP_DEV_DEBUG,
-			"(get) Alloced OP (%p:%llu)\n",	op, llu(op->tag));
-}
-
-void __op_release(struct orangefs_kernel_op_s *op);
-
-static inline void op_release(struct orangefs_kernel_op_s *op)
-{
-	if (atomic_dec_and_test(&op->ref_count)) {
-		gossip_debug(GOSSIP_DEV_DEBUG,
-			"(put) Releasing OP (%p:%llu)\n", op, llu((op)->tag));
-		__op_release(op);
-	}
-}
+void op_release(struct orangefs_kernel_op_s *op);
 
 extern void orangefs_bufmap_put(int);
 static inline void put_cancel(struct orangefs_kernel_op_s *op)
@@ -259,7 +241,7 @@ static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
 {
 	spin_lock(&op->lock);
 	if (unlikely(op_is_cancel(op))) {
-		list_del(&op->list);
+		list_del_init(&op->list);
 		spin_unlock(&op->lock);
 		put_cancel(op);
 	} else {
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index d980240..3f9e430 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -208,15 +208,20 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
 	 * Called with op->lock held.
 	 */
 	op->op_state |= OP_VFS_STATE_GIVEN_UP;
-
-	if (op_state_waiting(op)) {
+	/* from that point on it can't be moved by anybody else */
+	if (list_empty(&op->list)) {
+		/* caught copying to/from daemon */
+		BUG_ON(op_state_serviced(op));
+		spin_unlock(&op->lock);
+		wait_for_completion(&op->waitq);
+	} else if (op_state_waiting(op)) {
 		/*
 		 * upcall hasn't been read; remove op from upcall request
 		 * list.
 		 */
 		spin_unlock(&op->lock);
 		spin_lock(&orangefs_request_list_lock);
-		list_del(&op->list);
+		list_del_init(&op->list);
 		spin_unlock(&orangefs_request_list_lock);
 		gossip_debug(GOSSIP_WAIT_DEBUG,
 			     "Interrupted: Removed op %p from request_list\n",
@@ -225,23 +230,16 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
 		/* op must be removed from the in progress htable */
 		spin_unlock(&op->lock);
 		spin_lock(&htable_ops_in_progress_lock);
-		list_del(&op->list);
+		list_del_init(&op->list);
 		spin_unlock(&htable_ops_in_progress_lock);
 		gossip_debug(GOSSIP_WAIT_DEBUG,
 			     "Interrupted: Removed op %p"
 			     " from htable_ops_in_progress\n",
 			     op);
-	} else if (!op_state_serviced(op)) {
+	} else {
 		spin_unlock(&op->lock);
 		gossip_err("interrupted operation is in a weird state 0x%x\n",
 			   op->op_state);
-	} else {
-		/*
-		 * It is not intended for execution to flow here,
-		 * but having this unlock here makes sparse happy.
-		 */
-		gossip_err("%s: can't get here.\n", __func__);
-		spin_unlock(&op->lock);
 	}
 	reinit_completion(&op->waitq);
 }

  reply	other threads:[~2016-02-18 11:11 UTC|newest]

Thread overview: 111+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-15 21:46 Orangefs ABI documentation Mike Marshall
2016-01-22  7:11 ` Al Viro
2016-01-22 11:09   ` Mike Marshall
2016-01-22 16:59     ` Mike Marshall
2016-01-22 17:08       ` Al Viro
2016-01-22 17:40         ` Mike Marshall
2016-01-22 17:43         ` Al Viro
2016-01-22 18:17           ` Mike Marshall
2016-01-22 18:37             ` Al Viro
2016-01-22 19:07               ` Mike Marshall
2016-01-22 19:21                 ` Mike Marshall
2016-01-22 20:04                   ` Al Viro
2016-01-22 20:30                     ` Mike Marshall
2016-01-23  0:12                       ` Al Viro
2016-01-23  1:28                         ` Al Viro
2016-01-23  2:54                           ` Mike Marshall
2016-01-23 19:10                             ` Al Viro
2016-01-23 19:24                               ` Mike Marshall
2016-01-23 21:35                                 ` Mike Marshall
2016-01-23 22:05                                   ` Al Viro
2016-01-23 21:40                                 ` Al Viro
2016-01-23 22:36                                   ` Mike Marshall
2016-01-24  0:16                                     ` Al Viro
2016-01-24  4:05                                       ` Al Viro
2016-01-24 22:12                                         ` Mike Marshall
2016-01-30 17:22                                           ` Al Viro
2016-01-26 19:52                                         ` Martin Brandenburg
2016-01-30 17:34                                           ` Al Viro
2016-01-30 18:27                                             ` Al Viro
2016-02-04 23:30                                               ` Mike Marshall
2016-02-06 19:42                                                 ` Al Viro
2016-02-07  1:38                                                   ` Al Viro
2016-02-07  3:53                                                     ` Al Viro
2016-02-07 20:01                                                       ` [RFC] bufmap-related wait logics (Re: Orangefs ABI documentation) Al Viro
2016-02-08 22:26                                                       ` Orangefs ABI documentation Mike Marshall
2016-02-08 23:35                                                         ` Al Viro
2016-02-09  3:32                                                           ` Al Viro
2016-02-09 14:34                                                             ` Mike Marshall
2016-02-09 17:40                                                               ` Al Viro
2016-02-09 21:06                                                                 ` Al Viro
2016-02-09 22:25                                                                   ` Mike Marshall
2016-02-11 23:36                                                                   ` Mike Marshall
2016-02-09 22:02                                                                 ` Mike Marshall
2016-02-09 22:16                                                                   ` Al Viro
2016-02-09 22:40                                                                     ` Al Viro
2016-02-09 23:13                                                                       ` Al Viro
2016-02-10 16:44                                                                         ` Al Viro
2016-02-10 21:26                                                                           ` Al Viro
2016-02-11 23:54                                                                           ` Mike Marshall
2016-02-12  0:55                                                                             ` Al Viro
2016-02-12 12:13                                                                               ` Mike Marshall
2016-02-11  0:44                                                                         ` Al Viro
2016-02-11  3:22                                                                           ` Mike Marshall
2016-02-12  4:27                                                                             ` Al Viro
2016-02-12 12:26                                                                               ` Mike Marshall
2016-02-12 18:00                                                                                 ` Martin Brandenburg
2016-02-13 17:18                                                                                   ` Mike Marshall
2016-02-13 17:47                                                                                     ` Al Viro
2016-02-14  2:56                                                                                       ` Al Viro
2016-02-14  3:46                                                                                         ` [RFC] slot allocator - waitqueue use review needed (Re: Orangefs ABI documentation) Al Viro
2016-02-14  4:06                                                                                           ` Al Viro
2016-02-16  2:12                                                                                           ` Al Viro
2016-02-16 19:28                                                                                             ` Al Viro
2016-02-14 22:31                                                                                         ` Orangefs ABI documentation Mike Marshall
2016-02-14 23:43                                                                                           ` Al Viro
2016-02-15 17:46                                                                                             ` Mike Marshall
2016-02-15 18:45                                                                                               ` Al Viro
2016-02-15 22:32                                                                                                 ` Martin Brandenburg
2016-02-15 23:04                                                                                                   ` Al Viro
2016-02-16 23:15                                                                                                     ` Mike Marshall
2016-02-16 23:36                                                                                                       ` Al Viro
2016-02-16 23:54                                                                                                         ` Al Viro
2016-02-17 19:24                                                                                                           ` Mike Marshall
2016-02-17 20:11                                                                                                             ` Al Viro
2016-02-17 21:17                                                                                                               ` Al Viro
2016-02-17 22:24                                                                                                                 ` Mike Marshall
2016-02-17 22:40                                                                                                             ` Martin Brandenburg
2016-02-17 23:09                                                                                                               ` Al Viro
2016-02-17 23:15                                                                                                                 ` Al Viro
2016-02-18  0:04                                                                                                                   ` Al Viro
2016-02-18 11:11                                                                                                                     ` Al Viro [this message]
2016-02-18 18:58                                                                                                                       ` Mike Marshall
2016-02-18 19:20                                                                                                                         ` Al Viro
2016-02-18 19:49                                                                                                                         ` Martin Brandenburg
2016-02-18 20:08                                                                                                                           ` Mike Marshall
2016-02-18 20:22                                                                                                                             ` Mike Marshall
2016-02-18 20:38                                                                                                                               ` Mike Marshall
2016-02-18 20:52                                                                                                                                 ` Al Viro
2016-02-18 21:50                                                                                                                                   ` Mike Marshall
2016-02-19  0:25                                                                                                                                     ` Al Viro
2016-02-19 22:11                                                                                                                                       ` Mike Marshall
2016-02-19 22:22                                                                                                                                         ` Al Viro
2016-02-20 12:14                                                                                                                                           ` Mike Marshall
2016-02-20 13:36                                                                                                                                             ` Al Viro
2016-02-22 16:20                                                                                                                                               ` Mike Marshall
2016-02-22 21:22                                                                                                                                                 ` Mike Marshall
2016-02-23 21:58                                                                                                                                                   ` Mike Marshall
2016-02-26 20:21                                                                                                                                                     ` Mike Marshall
2016-02-19 22:32                                                                                                                                         ` Al Viro
2016-02-19 22:45                                                                                                                                           ` Martin Brandenburg
2016-02-19 22:50                                                                                                                                           ` Martin Brandenburg
2016-02-18 20:49                                                                                                                               ` Al Viro
2016-02-15 22:47                                                                                                 ` Mike Marshall
2016-01-23 22:46                                   ` write() semantics (Re: Orangefs ABI documentation) Al Viro
2016-01-23 23:35                                     ` Linus Torvalds
2016-03-03 22:25                                       ` Mike Marshall
2016-03-04 20:55                                         ` Mike Marshall
2016-01-22 20:51                     ` Orangefs ABI documentation Mike Marshall
2016-01-22 23:53                       ` Mike Marshall
2016-01-22 19:54                 ` Al Viro
2016-01-22 19:50             ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160218111122.GS17997@ZenIV.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=hubcap@omnibond.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=martin@omnibond.com \
    --cc=sfr@canb.auug.org.au \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.