All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
@ 2013-03-02  0:16 Davidlohr Bueso
  2013-03-02  1:14 ` Linus Torvalds
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Davidlohr Bueso @ 2013-03-02  0:16 UTC (permalink / raw)
  To: Linus Torvalds, Rik van Riel
  Cc: Vinod, Chegu, Low, Jason, linux-tip-commits, Peter Zijlstra,
	H. Peter Anvin, Andrew Morton, aquini, Michel Lespinasse,
	Ingo Molnar, Larry Woodman, Linux Kernel Mailing List,
	Steven Rostedt, Thomas Gleixner

Through ipc_lock() and, therefore, ipc_lock_check() we currently
return the locked ipc object. This is not necessary for all situations,
thus introduce, analogous, ipc_obtain_object and ipc_obtain_object_check
functions that only mark the RCU read critical region without acquiring
the lock and return the ipc object.

Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com>
---
 ipc/util.c | 42 ++++++++++++++++++++++++++++++++----------
 ipc/util.h |  2 ++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/ipc/util.c b/ipc/util.c
index 464a8ab..902f282 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -667,6 +667,21 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
 	out->seq	= in->seq;
 }
 
+struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
+{
+	struct kern_ipc_perm *out;
+	int lid = ipcid_to_idx(id);
+
+	rcu_read_lock();
+	out = idr_find(&ids->ipcs_idr, lid);
+	if (!out) {
+		rcu_read_unlock();
+		return ERR_PTR(-EINVAL);
+	}
+
+	return out;
+}
+
 /**
  * ipc_lock - Lock an ipc structure without rw_mutex held
  * @ids: IPC identifier set
@@ -679,18 +694,13 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 {
-	struct kern_ipc_perm *out;
-	int lid = ipcid_to_idx(id);
+	struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
 
-	rcu_read_lock();
-	out = idr_find(&ids->ipcs_idr, lid);
-	if (out == NULL) {
-		rcu_read_unlock();
+	if (!out)
 		return ERR_PTR(-EINVAL);
-	}
 
 	spin_lock(&out->lock);
-	
+
 	/* ipc_rmid() may have already freed the ID while ipc_lock
 	 * was spinning: here verify that the structure is still valid
 	 */
@@ -703,6 +713,18 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 	return out;
 }
 
+struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id)
+{
+	struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
+
+	if (IS_ERR(out))
+		return out;
+
+	if (ipc_checkid(out, id))
+		return ERR_PTR(-EIDRM);
+	return out;
+}
+
 struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
 {
 	struct kern_ipc_perm *out;
@@ -784,7 +806,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
 	int err;
 
 	down_write(&ids->rw_mutex);
-	ipcp = ipc_lock_check(ids, id);
+	ipcp = ipc_obtain_object_check(ids, id);
 	if (IS_ERR(ipcp)) {
 		err = PTR_ERR(ipcp);
 		goto out_up;
@@ -801,7 +823,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
 		return ipcp;
 
 	err = -EPERM;
-	ipc_unlock(ipcp);
+	rcu_read_unlock();
 out_up:
 	up_write(&ids->rw_mutex);
 	return ERR_PTR(err);
diff --git a/ipc/util.h b/ipc/util.h
index eeb79a1..2c68035 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -123,6 +123,7 @@ void ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr);
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
+struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
 
 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
@@ -173,6 +174,7 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
 }
 
 struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
+struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
 			struct ipc_ops *ops, struct ipc_params *params);
 void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
-- 
1.7.11.7





^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-02  0:16 [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object Davidlohr Bueso
@ 2013-03-02  1:14 ` Linus Torvalds
  2013-03-02  4:32 ` Michel Lespinasse
  2013-03-02 21:24 ` Linus Torvalds
  2 siblings, 0 replies; 8+ messages in thread
From: Linus Torvalds @ 2013-03-02  1:14 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Rik van Riel, Vinod, Chegu, Low, Jason, linux-tip-commits,
	Peter Zijlstra, H. Peter Anvin, Andrew Morton, aquini,
	Michel Lespinasse, Ingo Molnar, Larry Woodman,
	Linux Kernel Mailing List, Steven Rostedt, Thomas Gleixner

On Fri, Mar 1, 2013 at 4:16 PM, Davidlohr Bueso <davidlohr.bueso@hp.com> wrote:
>
> +struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)

This looks good..

> +struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id)

The comment on ipc_checkid() says that it should only be called with
the lock held.

Which seems entirely bogus, and I think it's just stale. It's just
checking the same ipc->seq that is only set at allocation, so it won't
change, afaik.

So I *think* the above is ok, but I'd want people to look at that
comment and remove it if it is stale. And if it's not stale, think
about this.

               Linus

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-02  0:16 [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object Davidlohr Bueso
  2013-03-02  1:14 ` Linus Torvalds
@ 2013-03-02  4:32 ` Michel Lespinasse
  2013-03-03  2:18   ` Rik van Riel
  2013-03-02 21:24 ` Linus Torvalds
  2 siblings, 1 reply; 8+ messages in thread
From: Michel Lespinasse @ 2013-03-02  4:32 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Linus Torvalds, Rik van Riel, Vinod, Chegu, Low, Jason,
	linux-tip-commits, Peter Zijlstra, H. Peter Anvin, Andrew Morton,
	aquini, Ingo Molnar, Larry Woodman, Linux Kernel Mailing List,
	Steven Rostedt, Thomas Gleixner

On Sat, Mar 2, 2013 at 8:16 AM, Davidlohr Bueso <davidlohr.bueso@hp.com> wrote:
> Through ipc_lock() and, therefore, ipc_lock_check() we currently
> return the locked ipc object. This is not necessary for all situations,
> thus introduce, analogous, ipc_obtain_object and ipc_obtain_object_check
> functions that only mark the RCU read critical region without acquiring
> the lock and return the ipc object.
>
> Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com>
> ---
>  ipc/util.c | 42 ++++++++++++++++++++++++++++++++----------
>  ipc/util.h |  2 ++
>  2 files changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/ipc/util.c b/ipc/util.c
> index 464a8ab..902f282 100644
> --- a/ipc/util.c
> +++ b/ipc/util.c
> @@ -667,6 +667,21 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
>         out->seq        = in->seq;
>  }
>
> +struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
> +{
> +       struct kern_ipc_perm *out;
> +       int lid = ipcid_to_idx(id);
> +
> +       rcu_read_lock();
> +       out = idr_find(&ids->ipcs_idr, lid);
> +       if (!out) {
> +               rcu_read_unlock();
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       return out;
> +}

I think it may be nicer to take the rcu read lock at the call site
rather than in ipc_obtain_object(), to make the rcu read lock/unlock
sites pair up more nicely. Either that or make an inline
ipc_release_object function that pairs up with ipc_obtain_object() and
just does an rcu_read_unlock().

> +
>  /**
>   * ipc_lock - Lock an ipc structure without rw_mutex held
>   * @ids: IPC identifier set
> @@ -679,18 +694,13 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
>
>  struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
>  {
> -       struct kern_ipc_perm *out;
> -       int lid = ipcid_to_idx(id);
> +       struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
>
> -       rcu_read_lock();
> -       out = idr_find(&ids->ipcs_idr, lid);
> -       if (out == NULL) {
> -               rcu_read_unlock();
> +       if (!out)

I think this should be if (IS_ERR(out)) ?

Looks great otherwise.

Acked-by: Michel Lespinasse <walken@google.com>

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-02  0:16 [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object Davidlohr Bueso
  2013-03-02  1:14 ` Linus Torvalds
  2013-03-02  4:32 ` Michel Lespinasse
@ 2013-03-02 21:24 ` Linus Torvalds
  2013-03-03  5:32   ` Davidlohr Bueso
  2 siblings, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2013-03-02 21:24 UTC (permalink / raw)
  To: Davidlohr Bueso, Emmanuel Benisty
  Cc: Rik van Riel, Vinod, Chegu, Low, Jason, linux-tip-commits,
	Peter Zijlstra, H. Peter Anvin, Andrew Morton, aquini,
	Michel Lespinasse, Ingo Molnar, Larry Woodman,
	Linux Kernel Mailing List, Steven Rostedt, Thomas Gleixner

On Fri, Mar 1, 2013 at 4:16 PM, Davidlohr Bueso <davidlohr.bueso@hp.com> wrote:
> @@ -784,7 +806,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
>         int err;
>
>         down_write(&ids->rw_mutex);
> -       ipcp = ipc_lock_check(ids, id);
> +       ipcp = ipc_obtain_object_check(ids, id);
>         if (IS_ERR(ipcp)) {
>                 err = PTR_ERR(ipcp);
>                 goto out_up;
> @@ -801,7 +823,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
>                 return ipcp;
>
>         err = -EPERM;
> -       ipc_unlock(ipcp);
> +       rcu_read_unlock();
>  out_up:
>         up_write(&ids->rw_mutex);
>         return ERR_PTR(err);

Uhhuh. This is very buggy, and I think it's the reason for the later
bugs that Emmanuel reported.

In particular, the *non-error* case is buggy, where it in the middle
of the function does

    return ipcp;

for a successful lookup.

It used to return a locked ipcp, now it no longer does. And you didn't
change any of the callers, which still do the "ipc_unlock()" at the
end.  So all the locking gets completely confused.

Oops.

                 Linus

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-02  4:32 ` Michel Lespinasse
@ 2013-03-03  2:18   ` Rik van Riel
  2013-03-03  5:53     ` Mike Galbraith
  0 siblings, 1 reply; 8+ messages in thread
From: Rik van Riel @ 2013-03-03  2:18 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: Davidlohr Bueso, Linus Torvalds, Vinod, Chegu, Low, Jason,
	linux-tip-commits, Peter Zijlstra, H. Peter Anvin, Andrew Morton,
	aquini, Ingo Molnar, Larry Woodman, Linux Kernel Mailing List,
	Steven Rostedt, Thomas Gleixner

On 03/01/2013 11:32 PM, Michel Lespinasse wrote:

> I think it may be nicer to take the rcu read lock at the call site
> rather than in ipc_obtain_object(), to make the rcu read lock/unlock
> sites pair up more nicely. Either that or make an inline
> ipc_release_object function that pairs up with ipc_obtain_object() and
> just does an rcu_read_unlock().

I started on a patch series to untangle the IPC locking, so
it will be a little more readable, and easier to maintain.

It is a slower approach than Davidlohr's, as in, it will take
a little longer to put a patch series together, but I hope it
will be easier to debug...

I hope to post a first iteration of the series by the middle
of next week.

-- 
All rights reversed

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-02 21:24 ` Linus Torvalds
@ 2013-03-03  5:32   ` Davidlohr Bueso
  2013-03-03  6:08     ` Linus Torvalds
  0 siblings, 1 reply; 8+ messages in thread
From: Davidlohr Bueso @ 2013-03-03  5:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Emmanuel Benisty, Rik van Riel, Vinod, Chegu, Low, Jason,
	linux-tip-commits, Peter Zijlstra, H. Peter Anvin, Andrew Morton,
	aquini, Michel Lespinasse, Ingo Molnar, Larry Woodman,
	Linux Kernel Mailing List, Steven Rostedt, Thomas Gleixner

On Sat, 2013-03-02 at 13:24 -0800, Linus Torvalds wrote:
> On Fri, Mar 1, 2013 at 4:16 PM, Davidlohr Bueso <davidlohr.bueso@hp.com> wrote:
> > @@ -784,7 +806,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
> >         int err;
> >
> >         down_write(&ids->rw_mutex);
> > -       ipcp = ipc_lock_check(ids, id);
> > +       ipcp = ipc_obtain_object_check(ids, id);
> >         if (IS_ERR(ipcp)) {
> >                 err = PTR_ERR(ipcp);
> >                 goto out_up;
> > @@ -801,7 +823,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
> >                 return ipcp;
> >
> >         err = -EPERM;
> > -       ipc_unlock(ipcp);
> > +       rcu_read_unlock();
> >  out_up:
> >         up_write(&ids->rw_mutex);
> >         return ERR_PTR(err);
> 
> Uhhuh. This is very buggy, and I think it's the reason for the later
> bugs that Emmanuel reported.

Yes, quite buggy. I was able to mess up three different machines with
this, and since semaphores aren't the only users of ipcctl_pre_down(),
it could explain the sys_shmctl() call in the trace Emmanuel reported. 

> 
> In particular, the *non-error* case is buggy, where it in the middle
> of the function does
> 
>     return ipcp;
> 
> for a successful lookup.
> 
> It used to return a locked ipcp, now it no longer does. And you didn't
> change any of the callers, which still do the "ipc_unlock()" at the
> end.  So all the locking gets completely confused.
> 

After updating the callers, [msgctl, semctl, shmctl]_down, to acquire
the lock for IPC_RMID and IPC_SET commands, I'm no longer seeing these
issues - so far on my regular laptop and two big boxes running my Oracle
benchmarks for a few hours. Something like below (yes, I will address
the open coded spin_lock calls):

@@ -1101,16 +1138,20 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
 
        switch(cmd){
        case IPC_RMID:
+               spin_lock(&sma->sem_perm.lock);
                freeary(ns, ipcp);
                goto out_up;
        case IPC_SET:
+               spin_lock(&sma->sem_perm.lock);
                err = ipc_update_perm(&semid64.sem_perm, ipcp);
                if (err)
                        goto out_unlock;
                sma->sem_ctime = get_seconds();
                break;
        default:
+               rcu_read_unlock();
                err = -EINVAL;
+               goto out_up;
        }


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-03  2:18   ` Rik van Riel
@ 2013-03-03  5:53     ` Mike Galbraith
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Galbraith @ 2013-03-03  5:53 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Michel Lespinasse, Davidlohr Bueso, Linus Torvalds, Vinod, Chegu,
	Low, Jason, linux-tip-commits, Peter Zijlstra, H. Peter Anvin,
	Andrew Morton, aquini, Ingo Molnar, Larry Woodman,
	Linux Kernel Mailing List, Steven Rostedt, Thomas Gleixner

On Sat, 2013-03-02 at 21:18 -0500, Rik van Riel wrote: 
> On 03/01/2013 11:32 PM, Michel Lespinasse wrote:
> 
> > I think it may be nicer to take the rcu read lock at the call site
> > rather than in ipc_obtain_object(), to make the rcu read lock/unlock
> > sites pair up more nicely. Either that or make an inline
> > ipc_release_object function that pairs up with ipc_obtain_object() and
> > just does an rcu_read_unlock().
> 
> I started on a patch series to untangle the IPC locking, so
> it will be a little more readable, and easier to maintain.
> 
> It is a slower approach than Davidlohr's, as in, it will take
> a little longer to put a patch series together, but I hope it
> will be easier to debug...
> 
> I hope to post a first iteration of the series by the middle
> of next week.

Goody, I'll be watching out for it.  I have a big box rt user who uses
semaphores in their very tight constraint application.  While they're
using them carefully, I saw a trace where contention cost vs jitter
budget was a bit too high for comfort, and semctl/semop collision.

-Mike


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object
  2013-03-03  5:32   ` Davidlohr Bueso
@ 2013-03-03  6:08     ` Linus Torvalds
  0 siblings, 0 replies; 8+ messages in thread
From: Linus Torvalds @ 2013-03-03  6:08 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Emmanuel Benisty, Rik van Riel, Vinod, Chegu, Low, Jason,
	linux-tip-commits, Peter Zijlstra, H. Peter Anvin, Andrew Morton,
	aquini, Michel Lespinasse, Ingo Molnar, Larry Woodman,
	Linux Kernel Mailing List, Steven Rostedt, Thomas Gleixner

On Sat, Mar 2, 2013 at 9:32 PM, Davidlohr Bueso <davidlohr.bueso@hp.com> wrote:
>
> After updating the callers, [msgctl, semctl, shmctl]_down, to acquire
> the lock for IPC_RMID and IPC_SET commands, I'm no longer seeing these
> issues - so far on my regular laptop and two big boxes running my Oracle
> benchmarks for a few hours. Something like below (yes, I will address
> the open coded spin_lock calls):

Actually, please do me a favor, and do *not* change the semantics of
these helper calls without changing the name of the helper.

So I'd ask that instead of changing the callers, do this:

 - make the version that does *not* lock be called ipcctl_pre_down_nolock()

 - then, keep the old name, and just make it do the
ipcctl_pre_down_nolock() followed by the spin_lock() (or rather, the
non-open-coded one).

Then, you can make each caller individually use the "nolock" version
instead, and move the locking into the caller. But don't do the same
mistake as the original patch, which was to change the locking
semantics while keeping the same name of the function.

In other words, it's much better to make these changes in small
gradual pieces, and make each piece very obviously not change any
semantics. So the first piece is the introduce the helper functions
with new semantics (and new names), while keeping the old helpers with
the old semantics and old names. Make it really clear that no
semantics change. And the second piece is then to start using the
split-up helpers that have different locking semantics and new names,
and do it on a call-by-call basis.

Ok?

              Linus

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-03-03  6:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-02  0:16 [RFC PATCH 1/2] ipc: introduce obtaining a lockless ipc object Davidlohr Bueso
2013-03-02  1:14 ` Linus Torvalds
2013-03-02  4:32 ` Michel Lespinasse
2013-03-03  2:18   ` Rik van Riel
2013-03-03  5:53     ` Mike Galbraith
2013-03-02 21:24 ` Linus Torvalds
2013-03-03  5:32   ` Davidlohr Bueso
2013-03-03  6:08     ` Linus Torvalds

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.