linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] AFS: Fix file locking
@ 2007-07-17 12:47 David Howells
  2007-07-18  0:50 ` Andrew Morton
  2007-07-18  9:23 ` David Howells
  0 siblings, 2 replies; 11+ messages in thread
From: David Howells @ 2007-07-17 12:47 UTC (permalink / raw)
  To: torvalds, akpm; +Cc: linux-kernel

Fix file locking for AFS:

 (*) Start the lock manager thread under a mutex to avoid a race.

 (*) Made the locking non-fair: New readlocks will jump pending writelocks if
     there's a readlock currently granted on a file.  This makes the behaviour
     similar to Linux's VFS locking.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/afs/flock.c |  126 +++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 79 insertions(+), 47 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 8f07f8d..bb97105 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -19,6 +19,7 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
 static void afs_fl_release_private(struct file_lock *fl);
 
 static struct workqueue_struct *afs_lock_manager;
+static DEFINE_MUTEX(afs_lock_manager_mutex);
 
 static struct file_lock_operations afs_lock_ops = {
 	.fl_copy_lock		= afs_fl_copy_lock,
@@ -30,12 +31,20 @@ static struct file_lock_operations afs_lock_ops = {
  */
 static int afs_init_lock_manager(void)
 {
+	int ret;
+
+	ret = 0;
 	if (!afs_lock_manager) {
-		afs_lock_manager = create_singlethread_workqueue("kafs_lockd");
-		if (!afs_lock_manager)
-			return -ENOMEM;
+		mutex_lock(&afs_lock_manager_mutex);
+		if (!afs_lock_manager) {
+			afs_lock_manager =
+				create_singlethread_workqueue("kafs_lockd");
+			if (!afs_lock_manager)
+				ret = -ENOMEM;
+		}
+		mutex_unlock(&afs_lock_manager_mutex);
 	}
-	return 0;
+	return ret;
 }
 
 /*
@@ -68,6 +77,29 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode)
 }
 
 /*
+ * grant one or more locks (readlocks are allowed to jump the queue if the
+ * first lock in the queue is itself a readlock)
+ * - the caller must hold the vnode lock
+ */
+static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
+{
+	struct file_lock *p, *_p;
+
+	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+	if (fl->fl_type == F_RDLCK) {
+		list_for_each_entry_safe(p, _p, &vnode->pending_locks,
+					 fl_u.afs.link) {
+			if (p->fl_type == F_RDLCK) {
+				p->fl_u.afs.state = AFS_LOCK_GRANTED;
+				list_move_tail(&p->fl_u.afs.link,
+					       &vnode->granted_locks);
+				wake_up(&p->fl_wait);
+			}
+		}
+	}
+}
+
+/*
  * do work for a lock, including:
  * - probing for a lock we're waiting on but didn't get immediately
  * - extending a lock that's close to timing out
@@ -172,8 +204,7 @@ void afs_lock_work(struct work_struct *work)
 				       struct file_lock, fl_u.afs.link) == fl) {
 				fl->fl_u.afs.state = ret;
 				if (ret == AFS_LOCK_GRANTED)
-					list_move_tail(&fl->fl_u.afs.link,
-						       &vnode->granted_locks);
+					afs_grant_locks(vnode, fl);
 				else
 					list_del_init(&fl->fl_u.afs.link);
 				wake_up(&fl->fl_wait);
@@ -258,49 +289,50 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 
 	spin_lock(&vnode->lock);
 
-	if (list_empty(&vnode->pending_locks)) {
-		/* if there's no-one else with a lock on this vnode, then we
-		 * need to ask the server for a lock */
-		if (list_empty(&vnode->granted_locks)) {
-			_debug("not locked");
-			ASSERTCMP(vnode->flags &
-				  ((1 << AFS_VNODE_LOCKING) |
-				   (1 << AFS_VNODE_READLOCKED) |
-				   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
-			list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
-			set_bit(AFS_VNODE_LOCKING, &vnode->flags);
-			spin_unlock(&vnode->lock);
+	/* if we've already got a readlock on the server then we can instantly
+	 * grant another readlock, irrespective of whether there are any
+	 * pending writelocks */
+	if (type == AFS_LOCK_READ &&
+	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
+		_debug("instant readlock");
+		ASSERTCMP(vnode->flags &
+			  ((1 << AFS_VNODE_LOCKING) |
+			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
+		ASSERT(!list_empty(&vnode->granted_locks));
+		goto sharing_existing_lock;
+	}
 
-			ret = afs_vnode_set_lock(vnode, key, type);
-			clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
-			switch (ret) {
-			case 0:
-				goto acquired_server_lock;
-			case -EWOULDBLOCK:
-				spin_lock(&vnode->lock);
-				ASSERT(list_empty(&vnode->granted_locks));
-				ASSERTCMP(vnode->pending_locks.next, ==,
-					  &fl->fl_u.afs.link);
-				goto wait;
-			default:
-				spin_lock(&vnode->lock);
-				list_del_init(&fl->fl_u.afs.link);
-				spin_unlock(&vnode->lock);
-				goto error;
-			}
-		}
+	/* if there's no-one else with a lock on this vnode, then we need to
+	 * ask the server for a lock */
+	if (list_empty(&vnode->pending_locks) &&
+	    list_empty(&vnode->granted_locks)) {
+		_debug("not locked");
+		ASSERTCMP(vnode->flags &
+			  ((1 << AFS_VNODE_LOCKING) |
+			   (1 << AFS_VNODE_READLOCKED) |
+			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
+		list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
+		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		spin_unlock(&vnode->lock);
 
-		/* if we've already got a readlock on the server and no waiting
-		 * writelocks, then we might be able to instantly grant another
-		 * readlock */
-		if (type == AFS_LOCK_READ &&
-		    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
-			_debug("instant readlock");
-			ASSERTCMP(vnode->flags &
-				  ((1 << AFS_VNODE_LOCKING) |
-				   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
-			ASSERT(!list_empty(&vnode->granted_locks));
-			goto sharing_existing_lock;
+		ret = afs_vnode_set_lock(vnode, key, type);
+		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		switch (ret) {
+		case 0:
+			_debug("acquired");
+			goto acquired_server_lock;
+		case -EWOULDBLOCK:
+			_debug("would block");
+			spin_lock(&vnode->lock);
+			ASSERT(list_empty(&vnode->granted_locks));
+			ASSERTCMP(vnode->pending_locks.next, ==,
+				  &fl->fl_u.afs.link);
+			goto wait;
+		default:
+			spin_lock(&vnode->lock);
+			list_del_init(&fl->fl_u.afs.link);
+			spin_unlock(&vnode->lock);
+			goto error;
 		}
 	}
 


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-17 12:47 [PATCH] AFS: Fix file locking David Howells
@ 2007-07-18  0:50 ` Andrew Morton
  2007-07-18  5:56   ` Nick Piggin
  2007-07-18  9:23 ` David Howells
  1 sibling, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2007-07-18  0:50 UTC (permalink / raw)
  To: David Howells; +Cc: torvalds, linux-kernel

On Tue, 17 Jul 2007 13:47:32 +0100
David Howells <dhowells@redhat.com> wrote:

> +	if (type == AFS_LOCK_READ &&
> +	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {

Here we use

	vnode->flags & (1 << foo)

> +		set_bit(AFS_VNODE_LOCKING, &vnode->flags);

and elsewhere we use set_bit(foo, &vnode->flags) and clear_bit()

This is a bit strange.  Does the open-coded bit-test have any performance
benefit on any architecture?  Not on x86 at least, afaik.

Please consider converting all that stuff to test_bit() sometime.  It sure
would look a lot better.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-18  0:50 ` Andrew Morton
@ 2007-07-18  5:56   ` Nick Piggin
  2007-07-20  3:41     ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: Nick Piggin @ 2007-07-18  5:56 UTC (permalink / raw)
  To: Andrew Morton; +Cc: David Howells, torvalds, linux-kernel

Andrew Morton wrote:
> On Tue, 17 Jul 2007 13:47:32 +0100
> David Howells <dhowells@redhat.com> wrote:
> 
> 
>>+	if (type == AFS_LOCK_READ &&
>>+	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
> 
> 
> Here we use
> 
> 	vnode->flags & (1 << foo)
> 
> 
>>+		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
> 
> 
> and elsewhere we use set_bit(foo, &vnode->flags) and clear_bit()
> 
> This is a bit strange.  Does the open-coded bit-test have any performance
> benefit on any architecture?  Not on x86 at least, afaik.

It uses locked operations on x86, but you can use __set_bit instead
(which should always be at least as efficient as the C version).

-- 
SUSE Labs, Novell Inc.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-17 12:47 [PATCH] AFS: Fix file locking David Howells
  2007-07-18  0:50 ` Andrew Morton
@ 2007-07-18  9:23 ` David Howells
  1 sibling, 0 replies; 11+ messages in thread
From: David Howells @ 2007-07-18  9:23 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, linux-kernel

Andrew Morton <akpm@linux-foundation.org> wrote:

> Here we use
> 
> 	vnode->flags & (1 << foo)
> 
> > +		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
> 
> and elsewhere we use set_bit(foo, &vnode->flags) and clear_bit()

Ah...  IIRC I was originally testing multiple bits in one go (test_bit()'s do
not merge because of the volatile pointer).

However, as I'm doing only one test now, I'll convert it to test_bit().

David

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-18  5:56   ` Nick Piggin
@ 2007-07-20  3:41     ` Andrew Morton
  2007-07-20  4:59       ` Nick Piggin
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2007-07-20  3:41 UTC (permalink / raw)
  To: Nick Piggin; +Cc: David Howells, torvalds, linux-kernel

On Wed, 18 Jul 2007 15:56:53 +1000 Nick Piggin <nickpiggin@yahoo.com.au> wrote:

> Andrew Morton wrote:
> > On Tue, 17 Jul 2007 13:47:32 +0100
> > David Howells <dhowells@redhat.com> wrote:
> > 
> > 
> >>+	if (type == AFS_LOCK_READ &&
> >>+	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
> > 
> > 
> > Here we use
> > 
> > 	vnode->flags & (1 << foo)
> > 
> > 
> >>+		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
> > 
> > 
> > and elsewhere we use set_bit(foo, &vnode->flags) and clear_bit()
> > 
> > This is a bit strange.  Does the open-coded bit-test have any performance
> > benefit on any architecture?  Not on x86 at least, afaik.
> 
> It uses locked operations on x86, but you can use __set_bit instead
> (which should always be at least as efficient as the C version).

I said "bit-test".  ie: test_bit().  That doesn't use a locked operation.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-20  3:41     ` Andrew Morton
@ 2007-07-20  4:59       ` Nick Piggin
  2007-07-21  1:37         ` Linus Torvalds
  2007-07-23  9:13         ` David Howells
  0 siblings, 2 replies; 11+ messages in thread
From: Nick Piggin @ 2007-07-20  4:59 UTC (permalink / raw)
  To: Andrew Morton; +Cc: David Howells, torvalds, linux-kernel

Andrew Morton wrote:
> On Wed, 18 Jul 2007 15:56:53 +1000 Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> 
> 
>>Andrew Morton wrote:
>>
>>>On Tue, 17 Jul 2007 13:47:32 +0100
>>>David Howells <dhowells@redhat.com> wrote:
>>>
>>>
>>>
>>>>+	if (type == AFS_LOCK_READ &&
>>>>+	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
>>>
>>>
>>>Here we use
>>>
>>>	vnode->flags & (1 << foo)
>>>
>>>
>>>
>>>>+		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
>>>
>>>
>>>and elsewhere we use set_bit(foo, &vnode->flags) and clear_bit()
>>>
>>>This is a bit strange.  Does the open-coded bit-test have any performance
>>>benefit on any architecture?  Not on x86 at least, afaik.
>>
>>It uses locked operations on x86, but you can use __set_bit instead
>>(which should always be at least as efficient as the C version).
> 
> 
> I said "bit-test".  ie: test_bit().  That doesn't use a locked operation.

So you did. Then to answer that, yes it could be faster because there are
stupid volatiles sprinkled all over the bitops code so you could easily
end up having to do more loads. Does it make a real difference? Unlikely,
but David loves counting cycles :)

-- 
SUSE Labs, Novell Inc.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-20  4:59       ` Nick Piggin
@ 2007-07-21  1:37         ` Linus Torvalds
  2007-07-23  7:07           ` Nick Piggin
  2007-07-23  9:13         ` David Howells
  1 sibling, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2007-07-21  1:37 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Andrew Morton, David Howells, linux-kernel



On Fri, 20 Jul 2007, Nick Piggin wrote:
> 
> So you did. Then to answer that, yes it could be faster because there are
> stupid volatiles sprinkled all over the bitops code so you could easily
> end up having to do more loads. Does it make a real difference? Unlikely,
> but David loves counting cycles :)

I thought we long long since removed the volatiles. They are buggy and 
horrible, and we really want to let the compiler combine multiple 
test-bits, and if they matter that implies locking is buggy or something 
worse..

Ie we'd *want*

	if (test_bit(x, y) || test_bit(z,y))

to be rewritten by the compiler as testing bits x/z at the same time.

But now I'm too scared to look.

		Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-21  1:37         ` Linus Torvalds
@ 2007-07-23  7:07           ` Nick Piggin
  2007-07-23  8:50             ` Satyam Sharma
  0 siblings, 1 reply; 11+ messages in thread
From: Nick Piggin @ 2007-07-23  7:07 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel

Linus Torvalds wrote:
> 
> On Fri, 20 Jul 2007, Nick Piggin wrote:
> 
>>So you did. Then to answer that, yes it could be faster because there are
>>stupid volatiles sprinkled all over the bitops code so you could easily
>>end up having to do more loads. Does it make a real difference? Unlikely,
>>but David loves counting cycles :)
> 
> 
> I thought we long long since removed the volatiles. They are buggy and 
> horrible, and we really want to let the compiler combine multiple 
> test-bits, and if they matter that implies locking is buggy or something 
> worse..
> 
> Ie we'd *want*
> 
> 	if (test_bit(x, y) || test_bit(z,y))
> 
> to be rewritten by the compiler as testing bits x/z at the same time.

Yep. We'd also want __set_bit(x, y); __set_bit(z, y); and such to be
combined.
> 
> But now I'm too scared to look.

Not a chance :) Even the asm-generic "reference" implementation ratifies
the volatile crapiness. Would you take a patch?

-- 
SUSE Labs, Novell Inc.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-23  7:07           ` Nick Piggin
@ 2007-07-23  8:50             ` Satyam Sharma
  2007-07-23  9:13               ` Satyam Sharma
  0 siblings, 1 reply; 11+ messages in thread
From: Satyam Sharma @ 2007-07-23  8:50 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

Hi,

On 7/23/07, Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> Linus Torvalds wrote:
> >
> > On Fri, 20 Jul 2007, Nick Piggin wrote:
> >
> >>So you did. Then to answer that, yes it could be faster because there are
> >>stupid volatiles sprinkled all over the bitops code so you could easily
> >>end up having to do more loads. Does it make a real difference? Unlikely,
> >>but David loves counting cycles :)
> >
> >
> > I thought we long long since removed the volatiles. They are buggy and
> > horrible, and we really want to let the compiler combine multiple
> > test-bits, and if they matter that implies locking is buggy or something
> > worse..
> >
> > Ie we'd *want*
> >
> >       if (test_bit(x, y) || test_bit(z,y))
> >
> > to be rewritten by the compiler as testing bits x/z at the same time.
>
> Yep. We'd also want __set_bit(x, y); __set_bit(z, y); and such to be
> combined.
> >
> > But now I'm too scared to look.
>
> Not a chance :) Even the asm-generic "reference" implementation ratifies
> the volatile crapiness. Would you take a patch?

Coincidentally, I'm working on a cleanup of the bitops code just now --
I stumbled upon a lot of varied bogosity in there :-) Intend to send it
out in a couple of hours, probably.

Satyam

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-20  4:59       ` Nick Piggin
  2007-07-21  1:37         ` Linus Torvalds
@ 2007-07-23  9:13         ` David Howells
  1 sibling, 0 replies; 11+ messages in thread
From: David Howells @ 2007-07-23  9:13 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Nick Piggin, Andrew Morton, linux-kernel

Linus Torvalds <torvalds@linux-foundation.org> wrote:

> I thought we long long since removed the volatiles.

They're certainly still there in i386 and x86_64.

David

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] AFS: Fix file locking
  2007-07-23  8:50             ` Satyam Sharma
@ 2007-07-23  9:13               ` Satyam Sharma
  0 siblings, 0 replies; 11+ messages in thread
From: Satyam Sharma @ 2007-07-23  9:13 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

[ Restricting discussion to the i386 bitops implementation. ]

Hi Nick,

On 7/23/07, Satyam Sharma <satyam.sharma@gmail.com> wrote:
> Hi,
>
> On 7/23/07, Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> > Linus Torvalds wrote:
> > >
> > > On Fri, 20 Jul 2007, Nick Piggin wrote:
> > >
> > >>So you did. Then to answer that, yes it could be faster because there are
> > >>stupid volatiles sprinkled all over the bitops code so you could easily
> > >>end up having to do more loads. Does it make a real difference? Unlikely,
> > >>but David loves counting cycles :)
> > >
> > >
> > > I thought we long long since removed the volatiles. They are buggy and
> > > horrible, and we really want to let the compiler combine multiple
> > > test-bits, and if they matter that implies locking is buggy or something
> > > worse..
> > >
> > > Ie we'd *want*
> > >
> > >       if (test_bit(x, y) || test_bit(z,y))
> > >
> > > to be rewritten by the compiler as testing bits x/z at the same time.
> >
> > Yep. We'd also want __set_bit(x, y); __set_bit(z, y); and such to be
> > combined.

BTW I'm also running some tests writing test code, compiling and verifying
the code gcc generates ... curiously, volatile-access-casting of the passed
bit-string address is not the only thing that'll prevent gcc's optimizer from
combining the operations such as ones you listed above. Then there are
-O2 vs -Os (and constant_test_bit() vs variable_test_bit()) differences I am
observing ... and sometimes just the inadequacy of gcc's optimizer -- note
that constant_test_bit() seems to go through extra hoops unnecessarily to
avoid honouring @nr >= 32, whereas none of the other primitives in that file
does that. So the i386 kernel's stock constant_test_bit implementation
ends up differing from David's open-coded versions quite drastically in
subtle ways, and again makes it difficult to combine the kind of operations
you guys are discussing here ...

It's a given, of course, that the code that gcc generates when combining
such operations would clearly be more optimal than the simple btl-sbbl pair
with test-and-conditional-jumps that would otherwise get generated ...

> > > But now I'm too scared to look.
> >
> > Not a chance :) Even the asm-generic "reference" implementation ratifies
> > the volatile crapiness. Would you take a patch?
>
> Coincidentally, I'm working on a cleanup of the bitops code just now --
> I stumbled upon a lot of varied bogosity in there :-)

Such as bogus/invalid asm constraints being passed in the inline assembly.
Probably gcc knows everybody gets its complicated extended asm wrong,
so doesn't barf when parsing such stuff ... :-)

> Intend to send it
> out in a couple of hours, probably.

Satyam

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2007-07-23  9:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-07-17 12:47 [PATCH] AFS: Fix file locking David Howells
2007-07-18  0:50 ` Andrew Morton
2007-07-18  5:56   ` Nick Piggin
2007-07-20  3:41     ` Andrew Morton
2007-07-20  4:59       ` Nick Piggin
2007-07-21  1:37         ` Linus Torvalds
2007-07-23  7:07           ` Nick Piggin
2007-07-23  8:50             ` Satyam Sharma
2007-07-23  9:13               ` Satyam Sharma
2007-07-23  9:13         ` David Howells
2007-07-18  9:23 ` David Howells

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).