netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
@ 2019-09-30 13:10 Xin Long
  2019-10-01 22:30 ` David Miller
  2019-10-02  1:03 ` Marcelo Ricardo Leitner
  0 siblings, 2 replies; 11+ messages in thread
From: Xin Long @ 2019-09-30 13:10 UTC (permalink / raw)
  To: network dev, linux-sctp; +Cc: davem, Marcelo Ricardo Leitner, Neil Horman

This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:

  [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
  [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
  [...] Call Trace:
  [...]  security_sctp_bind_connect+0x58/0x90
  [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
  [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
  [...]  sctp_do_sm+0x139/0x520 [sctp]
  [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
  [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
  [...]  __release_sock+0x120/0x370
  [...]  release_sock+0x4f/0x180
  [...]  sctp_accept+0x3f9/0x5a0 [sctp]
  [...]  inet_accept+0xe7/0x6f0

It was caused by that the 'newsk' sk_socket was not set before going to
security sctp hook when doing accept() on a tcp-type socket:

  inet_accept()->
    sctp_accept():
      lock_sock():
          lock listening 'sk'
                                          do_softirq():
                                            sctp_rcv():  <-- [1]
                                                asconf chunk arrived and
                                                enqueued in 'sk' backlog
      sctp_sock_migrate():
          set asoc's sk to 'newsk'
      release_sock():
          sctp_backlog_rcv():
            lock 'newsk'
            sctp_process_asconf()  <-- [2]
            unlock 'newsk'
    sock_graft():
        set sk_socket  <-- [3]

As it shows, at [1] the asconf chunk would be put into the listening 'sk'
backlog, as accept() was holding its sock lock. Then at [2] asconf would
get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
would deref it, then kernel crashed.

Here to fix it by adding a new function sctp_inet_accept() for .accept(),
where it calls release_sock() on listening sk after sock_graft() in which
'newsk' sk_socket is set.

Note that TCPF_SYN_RECV flag check is removed in sctp_inet_accept(), as
sctp doesn't really use it.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 include/net/sctp/sctp.h |  2 ++
 net/sctp/ipv6.c         |  2 +-
 net/sctp/protocol.c     |  2 +-
 net/sctp/socket.c       | 31 ++++++++++++++++++++++++++-----
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 5d60f13..be411a2 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -90,6 +90,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
  */
 int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
 		      int addr_len, int flags);
+int sctp_inet_accept(struct socket *sock, struct socket *newsock,
+		     int flags, bool kern);
 int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 int sctp_inet_listen(struct socket *sock, int backlog);
 void sctp_write_space(struct sock *sk);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e5f2fc7..529b930 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1011,7 +1011,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
 	.bind		   = inet6_bind,
 	.connect	   = sctp_inet_connect,
 	.socketpair	   = sock_no_socketpair,
-	.accept		   = inet_accept,
+	.accept		   = sctp_inet_accept,
 	.getname	   = sctp_getname,
 	.poll		   = sctp_poll,
 	.ioctl		   = inet6_ioctl,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 08d14d8..d796ba1 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1007,7 +1007,7 @@ static const struct proto_ops inet_seqpacket_ops = {
 	.bind		   = inet_bind,
 	.connect	   = sctp_inet_connect,
 	.socketpair	   = sock_no_socketpair,
-	.accept		   = inet_accept,
+	.accept		   = sctp_inet_accept,
 	.getname	   = inet_getname,	/* Semantics are different.  */
 	.poll		   = sctp_poll,
 	.ioctl		   = inet_ioctl,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 939b8d2..d7d93e8 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4878,8 +4878,6 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 	long timeo;
 	int error = 0;
 
-	lock_sock(sk);
-
 	sp = sctp_sk(sk);
 	ep = sp->ep;
 
@@ -4920,11 +4918,36 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 	}
 
 out:
-	release_sock(sk);
 	*err = error;
 	return newsk;
 }
 
+int sctp_inet_accept(struct socket *sock, struct socket *newsock,
+		     int flags, bool kern)
+{
+	struct sock *sk = sock->sk;
+	struct sock *newsk;
+	int err = 0;
+
+	lock_sock(sk);
+	newsk = sctp_accept(sk, flags, &err, kern);
+	if (!newsk)
+		goto do_err;
+
+	lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+	sock_rps_record_flow(newsk);
+	WARN_ON(!((1 << newsk->sk_state) &
+		  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+
+	sock_graft(newsk, newsock);
+	newsock->state = SS_CONNECTED;
+	release_sock(newsk);
+
+do_err:
+	release_sock(sk);
+	return err;
+}
+
 /* The SCTP ioctl handler. */
 static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
@@ -9487,7 +9510,6 @@ struct proto sctp_prot = {
 	.owner       =	THIS_MODULE,
 	.close       =	sctp_close,
 	.disconnect  =	sctp_disconnect,
-	.accept      =	sctp_accept,
 	.ioctl       =	sctp_ioctl,
 	.init        =	sctp_init_sock,
 	.destroy     =	sctp_destroy_sock,
@@ -9529,7 +9551,6 @@ struct proto sctpv6_prot = {
 	.owner		= THIS_MODULE,
 	.close		= sctp_close,
 	.disconnect	= sctp_disconnect,
-	.accept		= sctp_accept,
 	.ioctl		= sctp_ioctl,
 	.init		= sctp_init_sock,
 	.destroy	= sctp_v6_destroy_sock,
-- 
2.1.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-09-30 13:10 [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog Xin Long
@ 2019-10-01 22:30 ` David Miller
  2019-10-02  1:03 ` Marcelo Ricardo Leitner
  1 sibling, 0 replies; 11+ messages in thread
From: David Miller @ 2019-10-01 22:30 UTC (permalink / raw)
  To: lucien.xin; +Cc: netdev, linux-sctp, marcelo.leitner, nhorman

From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 30 Sep 2019 21:10:18 +0800

> This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
 ...

Marcel and Neil, please review.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-09-30 13:10 [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog Xin Long
  2019-10-01 22:30 ` David Miller
@ 2019-10-02  1:03 ` Marcelo Ricardo Leitner
  2019-10-02  8:23   ` Xin Long
  1 sibling, 1 reply; 11+ messages in thread
From: Marcelo Ricardo Leitner @ 2019-10-02  1:03 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman

On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> 
>   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
>   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
>   [...] Call Trace:
>   [...]  security_sctp_bind_connect+0x58/0x90
>   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
>   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
>   [...]  sctp_do_sm+0x139/0x520 [sctp]
>   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
>   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
>   [...]  __release_sock+0x120/0x370
>   [...]  release_sock+0x4f/0x180
>   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
>   [...]  inet_accept+0xe7/0x6f0
> 
> It was caused by that the 'newsk' sk_socket was not set before going to
> security sctp hook when doing accept() on a tcp-type socket:
> 
>   inet_accept()->
>     sctp_accept():
>       lock_sock():
>           lock listening 'sk'
>                                           do_softirq():
>                                             sctp_rcv():  <-- [1]
>                                                 asconf chunk arrived and
>                                                 enqueued in 'sk' backlog
>       sctp_sock_migrate():
>           set asoc's sk to 'newsk'
>       release_sock():
>           sctp_backlog_rcv():
>             lock 'newsk'
>             sctp_process_asconf()  <-- [2]
>             unlock 'newsk'
>     sock_graft():
>         set sk_socket  <-- [3]
> 
> As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> backlog, as accept() was holding its sock lock. Then at [2] asconf would
> get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> would deref it, then kernel crashed.

Note that sctp will migrate such incoming chunks from sk to newsk in
sctp_rcv() if they arrived after the mass-migration performed at
sctp_sock_migrate().

That said, did you explore changing inet_accept() so that
sk1->sk_prot->accept() would return sk2 still/already locked?
That would be enough to block [2] from happening as then it would be
queued on newsk backlog this time and avoid nearly duplicating
inet_accept(). (too bad for this chunk, hit 2 backlogs..)

AFAICT TCP code would be fine with such change. Didn't check other
protocols.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02  1:03 ` Marcelo Ricardo Leitner
@ 2019-10-02  8:23   ` Xin Long
  2019-10-02 12:24     ` Neil Horman
  2019-10-02 12:55     ` Marcelo Ricardo Leitner
  0 siblings, 2 replies; 11+ messages in thread
From: Xin Long @ 2019-10-02  8:23 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: network dev, linux-sctp, davem, Neil Horman

On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
<marcelo.leitner@gmail.com> wrote:
>
> On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> >
> >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> >   [...] Call Trace:
> >   [...]  security_sctp_bind_connect+0x58/0x90
> >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> >   [...]  __release_sock+0x120/0x370
> >   [...]  release_sock+0x4f/0x180
> >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> >   [...]  inet_accept+0xe7/0x6f0
> >
> > It was caused by that the 'newsk' sk_socket was not set before going to
> > security sctp hook when doing accept() on a tcp-type socket:
> >
> >   inet_accept()->
> >     sctp_accept():
> >       lock_sock():
> >           lock listening 'sk'
> >                                           do_softirq():
> >                                             sctp_rcv():  <-- [1]
> >                                                 asconf chunk arrived and
> >                                                 enqueued in 'sk' backlog
> >       sctp_sock_migrate():
> >           set asoc's sk to 'newsk'
> >       release_sock():
> >           sctp_backlog_rcv():
> >             lock 'newsk'
> >             sctp_process_asconf()  <-- [2]
> >             unlock 'newsk'
> >     sock_graft():
> >         set sk_socket  <-- [3]
> >
> > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > would deref it, then kernel crashed.
>
> Note that sctp will migrate such incoming chunks from sk to newsk in
> sctp_rcv() if they arrived after the mass-migration performed at
> sctp_sock_migrate().
>
> That said, did you explore changing inet_accept() so that
> sk1->sk_prot->accept() would return sk2 still/already locked?
> That would be enough to block [2] from happening as then it would be
> queued on newsk backlog this time and avoid nearly duplicating
> inet_accept(). (too bad for this chunk, hit 2 backlogs..)
We don't have to bother inet_accept() for it. I had this one below,
and I was just thinking the locks order doesn't look nice. Do you
think this is more acceptable?

@@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
*sk, int flags, int *err, bool kern)
         * asoc to the newsk.
         */
        error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
-       if (error) {
-               sk_common_release(newsk);
-               newsk = NULL;
+       if (!error) {
+               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+               release_sock(sk);
+               release_sock(newsk);
+               *err = error;
+
+               return newsk;
        }

 out:
        release_sock(sk);
        *err = error;
-       return newsk;
+       return NULL;
 }

>
> AFAICT TCP code would be fine with such change. Didn't check other
> protocols.
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02  8:23   ` Xin Long
@ 2019-10-02 12:24     ` Neil Horman
  2019-10-02 12:55     ` Marcelo Ricardo Leitner
  1 sibling, 0 replies; 11+ messages in thread
From: Neil Horman @ 2019-10-02 12:24 UTC (permalink / raw)
  To: Xin Long; +Cc: Marcelo Ricardo Leitner, network dev, linux-sctp, davem

On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> <marcelo.leitner@gmail.com> wrote:
> >
> > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > >
> > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > >   [...] Call Trace:
> > >   [...]  security_sctp_bind_connect+0x58/0x90
> > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > >   [...]  __release_sock+0x120/0x370
> > >   [...]  release_sock+0x4f/0x180
> > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > >   [...]  inet_accept+0xe7/0x6f0
> > >
> > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > security sctp hook when doing accept() on a tcp-type socket:
> > >
> > >   inet_accept()->
> > >     sctp_accept():
> > >       lock_sock():
> > >           lock listening 'sk'
> > >                                           do_softirq():
> > >                                             sctp_rcv():  <-- [1]
> > >                                                 asconf chunk arrived and
> > >                                                 enqueued in 'sk' backlog
> > >       sctp_sock_migrate():
> > >           set asoc's sk to 'newsk'
> > >       release_sock():
> > >           sctp_backlog_rcv():
> > >             lock 'newsk'
> > >             sctp_process_asconf()  <-- [2]
> > >             unlock 'newsk'
> > >     sock_graft():
> > >         set sk_socket  <-- [3]
> > >
> > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > would deref it, then kernel crashed.
> >
> > Note that sctp will migrate such incoming chunks from sk to newsk in
> > sctp_rcv() if they arrived after the mass-migration performed at
> > sctp_sock_migrate().
> >
> > That said, did you explore changing inet_accept() so that
> > sk1->sk_prot->accept() would return sk2 still/already locked?
> > That would be enough to block [2] from happening as then it would be
> > queued on newsk backlog this time and avoid nearly duplicating
> > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> We don't have to bother inet_accept() for it. I had this one below,
> and I was just thinking the locks order doesn't look nice. Do you
> think this is more acceptable?
> 
> @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> *sk, int flags, int *err, bool kern)
>          * asoc to the newsk.
>          */
>         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> -       if (error) {
> -               sk_common_release(newsk);
> -               newsk = NULL;
> +       if (!error) {
> +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> +               release_sock(sk);
> +               release_sock(newsk);
> +               *err = error;
> +
> +               return newsk;
>         }
> 
>  out:
>         release_sock(sk);
>         *err = error;
> -       return newsk;
> +       return NULL;
>  }
> 
I think this is far more concise, and I don't see a particular issue
with the locking order (though I think you could reverse the order there
if you needed to.  In fact if you did that, you could change the if
(!error) to an if/else statement where the if set newsk = NULL, and the
else clause just released newsk and set err *, then you would be able to
maintain a common return point.

Neil

> >
> > AFAICT TCP code would be fine with such change. Didn't check other
> > protocols.
> >
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02  8:23   ` Xin Long
  2019-10-02 12:24     ` Neil Horman
@ 2019-10-02 12:55     ` Marcelo Ricardo Leitner
  2019-10-02 17:26       ` Xin Long
  1 sibling, 1 reply; 11+ messages in thread
From: Marcelo Ricardo Leitner @ 2019-10-02 12:55 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman

On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> <marcelo.leitner@gmail.com> wrote:
> >
> > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > >
> > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > >   [...] Call Trace:
> > >   [...]  security_sctp_bind_connect+0x58/0x90
> > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > >   [...]  __release_sock+0x120/0x370
> > >   [...]  release_sock+0x4f/0x180
> > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > >   [...]  inet_accept+0xe7/0x6f0
> > >
> > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > security sctp hook when doing accept() on a tcp-type socket:
> > >
> > >   inet_accept()->
> > >     sctp_accept():
> > >       lock_sock():
> > >           lock listening 'sk'
> > >                                           do_softirq():
> > >                                             sctp_rcv():  <-- [1]
> > >                                                 asconf chunk arrived and
> > >                                                 enqueued in 'sk' backlog
> > >       sctp_sock_migrate():
> > >           set asoc's sk to 'newsk'
> > >       release_sock():
> > >           sctp_backlog_rcv():
> > >             lock 'newsk'
> > >             sctp_process_asconf()  <-- [2]
> > >             unlock 'newsk'
> > >     sock_graft():
> > >         set sk_socket  <-- [3]
> > >
> > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > would deref it, then kernel crashed.
> >
> > Note that sctp will migrate such incoming chunks from sk to newsk in
> > sctp_rcv() if they arrived after the mass-migration performed at
> > sctp_sock_migrate().
> >
> > That said, did you explore changing inet_accept() so that
> > sk1->sk_prot->accept() would return sk2 still/already locked?
> > That would be enough to block [2] from happening as then it would be
> > queued on newsk backlog this time and avoid nearly duplicating
> > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> We don't have to bother inet_accept() for it. I had this one below,
> and I was just thinking the locks order doesn't look nice. Do you
> think this is more acceptable?
> 
> @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> *sk, int flags, int *err, bool kern)
>          * asoc to the newsk.
>          */
>         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> -       if (error) {
> -               sk_common_release(newsk);
> -               newsk = NULL;
> +       if (!error) {
> +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> +               release_sock(sk);

Interesting. It fixes the backlog processing, ok. Question:

> +               release_sock(newsk);

As newsk is hashed already and unlocked here to be locked again later
on inet_accept(), it could receive a packet in between (thus before
sock_graft() could have a chance to run), no?

> +               *err = error;
> +
> +               return newsk;
>         }
> 
>  out:
>         release_sock(sk);
>         *err = error;
> -       return newsk;
> +       return NULL;
>  }
> 
> >
> > AFAICT TCP code would be fine with such change. Didn't check other
> > protocols.
> >
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02 12:55     ` Marcelo Ricardo Leitner
@ 2019-10-02 17:26       ` Xin Long
  2019-10-02 17:28         ` Xin Long
  2019-10-02 17:41         ` Marcelo Ricardo Leitner
  0 siblings, 2 replies; 11+ messages in thread
From: Xin Long @ 2019-10-02 17:26 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: network dev, linux-sctp, davem, Neil Horman

On Wed, Oct 2, 2019 at 8:55 PM Marcelo Ricardo Leitner
<marcelo.leitner@gmail.com> wrote:
>
> On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> > On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> > <marcelo.leitner@gmail.com> wrote:
> > >
> > > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > > >
> > > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > > >   [...] Call Trace:
> > > >   [...]  security_sctp_bind_connect+0x58/0x90
> > > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > > >   [...]  __release_sock+0x120/0x370
> > > >   [...]  release_sock+0x4f/0x180
> > > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > > >   [...]  inet_accept+0xe7/0x6f0
> > > >
> > > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > > security sctp hook when doing accept() on a tcp-type socket:
> > > >
> > > >   inet_accept()->
> > > >     sctp_accept():
> > > >       lock_sock():
> > > >           lock listening 'sk'
> > > >                                           do_softirq():
> > > >                                             sctp_rcv():  <-- [1]
> > > >                                                 asconf chunk arrived and
> > > >                                                 enqueued in 'sk' backlog
> > > >       sctp_sock_migrate():
> > > >           set asoc's sk to 'newsk'
> > > >       release_sock():
> > > >           sctp_backlog_rcv():
> > > >             lock 'newsk'
> > > >             sctp_process_asconf()  <-- [2]
> > > >             unlock 'newsk'
> > > >     sock_graft():
> > > >         set sk_socket  <-- [3]
> > > >
> > > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > > would deref it, then kernel crashed.
> > >
> > > Note that sctp will migrate such incoming chunks from sk to newsk in
> > > sctp_rcv() if they arrived after the mass-migration performed at
> > > sctp_sock_migrate().
> > >
> > > That said, did you explore changing inet_accept() so that
> > > sk1->sk_prot->accept() would return sk2 still/already locked?
> > > That would be enough to block [2] from happening as then it would be
> > > queued on newsk backlog this time and avoid nearly duplicating
> > > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> > We don't have to bother inet_accept() for it. I had this one below,
> > and I was just thinking the locks order doesn't look nice. Do you
> > think this is more acceptable?
> >
> > @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> > *sk, int flags, int *err, bool kern)
> >          * asoc to the newsk.
> >          */
> >         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> > -       if (error) {
> > -               sk_common_release(newsk);
> > -               newsk = NULL;
> > +       if (!error) {
> > +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> > +               release_sock(sk);
>
> Interesting. It fixes the backlog processing, ok. Question:
>
> > +               release_sock(newsk);
>
> As newsk is hashed already and unlocked here to be locked again later
> on inet_accept(), it could receive a packet in between (thus before
> sock_graft() could have a chance to run), no?

You're right, it explains another call trace happened once in our testing.

The way to changing inet_accept() will also have to change all protocols'
.accept(). Given that this issue is only triggered in a very small moment,
can we just silently discard this asconf chunk if sk->sk_socket is NULL?
and let peer's T4-timer retransmit it.

@@ -3709,6 +3709,9 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
        struct sctp_addiphdr *hdr;
        __u32 serial;

+       if (asoc->base.sk->sk_socket)
+               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+

Note we can't do this in sctp_process_asconf_param(), as an asconf_ack
will be sent back.

>
> > +               *err = error;
> > +
> > +               return newsk;
> >         }
> >
> >  out:
> >         release_sock(sk);
> >         *err = error;
> > -       return newsk;
> > +       return NULL;
> >  }
> >
> > >
> > > AFAICT TCP code would be fine with such change. Didn't check other
> > > protocols.
> > >
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02 17:26       ` Xin Long
@ 2019-10-02 17:28         ` Xin Long
  2019-10-02 17:41         ` Marcelo Ricardo Leitner
  1 sibling, 0 replies; 11+ messages in thread
From: Xin Long @ 2019-10-02 17:28 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: network dev, linux-sctp, davem, Neil Horman

On Thu, Oct 3, 2019 at 1:26 AM Xin Long <lucien.xin@gmail.com> wrote:
>
> On Wed, Oct 2, 2019 at 8:55 PM Marcelo Ricardo Leitner
> <marcelo.leitner@gmail.com> wrote:
> >
> > On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> > > On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> > > <marcelo.leitner@gmail.com> wrote:
> > > >
> > > > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > > > >
> > > > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > > > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > > > >   [...] Call Trace:
> > > > >   [...]  security_sctp_bind_connect+0x58/0x90
> > > > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > > > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > > > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > > > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > > > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > > > >   [...]  __release_sock+0x120/0x370
> > > > >   [...]  release_sock+0x4f/0x180
> > > > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > > > >   [...]  inet_accept+0xe7/0x6f0
> > > > >
> > > > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > > > security sctp hook when doing accept() on a tcp-type socket:
> > > > >
> > > > >   inet_accept()->
> > > > >     sctp_accept():
> > > > >       lock_sock():
> > > > >           lock listening 'sk'
> > > > >                                           do_softirq():
> > > > >                                             sctp_rcv():  <-- [1]
> > > > >                                                 asconf chunk arrived and
> > > > >                                                 enqueued in 'sk' backlog
> > > > >       sctp_sock_migrate():
> > > > >           set asoc's sk to 'newsk'
> > > > >       release_sock():
> > > > >           sctp_backlog_rcv():
> > > > >             lock 'newsk'
> > > > >             sctp_process_asconf()  <-- [2]
> > > > >             unlock 'newsk'
> > > > >     sock_graft():
> > > > >         set sk_socket  <-- [3]
> > > > >
> > > > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > > > would deref it, then kernel crashed.
> > > >
> > > > Note that sctp will migrate such incoming chunks from sk to newsk in
> > > > sctp_rcv() if they arrived after the mass-migration performed at
> > > > sctp_sock_migrate().
> > > >
> > > > That said, did you explore changing inet_accept() so that
> > > > sk1->sk_prot->accept() would return sk2 still/already locked?
> > > > That would be enough to block [2] from happening as then it would be
> > > > queued on newsk backlog this time and avoid nearly duplicating
> > > > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> > > We don't have to bother inet_accept() for it. I had this one below,
> > > and I was just thinking the locks order doesn't look nice. Do you
> > > think this is more acceptable?
> > >
> > > @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> > > *sk, int flags, int *err, bool kern)
> > >          * asoc to the newsk.
> > >          */
> > >         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> > > -       if (error) {
> > > -               sk_common_release(newsk);
> > > -               newsk = NULL;
> > > +       if (!error) {
> > > +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> > > +               release_sock(sk);
> >
> > Interesting. It fixes the backlog processing, ok. Question:
> >
> > > +               release_sock(newsk);
> >
> > As newsk is hashed already and unlocked here to be locked again later
> > on inet_accept(), it could receive a packet in between (thus before
> > sock_graft() could have a chance to run), no?
>
> You're right, it explains another call trace happened once in our testing.
>
> The way to changing inet_accept() will also have to change all protocols'
> .accept(). Given that this issue is only triggered in a very small moment,
> can we just silently discard this asconf chunk if sk->sk_socket is NULL?
> and let peer's T4-timer retransmit it.
>
> @@ -3709,6 +3709,9 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
>         struct sctp_addiphdr *hdr;
>         __u32 serial;
>
> +       if (asoc->base.sk->sk_socket)
sorry, if (!asoc->base.sk->sk_socket) ^
> +               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
> +
>
> Note we can't do this in sctp_process_asconf_param(), as an asconf_ack
> will be sent back.
>
> >
> > > +               *err = error;
> > > +
> > > +               return newsk;
> > >         }
> > >
> > >  out:
> > >         release_sock(sk);
> > >         *err = error;
> > > -       return newsk;
> > > +       return NULL;
> > >  }
> > >
> > > >
> > > > AFAICT TCP code would be fine with such change. Didn't check other
> > > > protocols.
> > > >
> > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02 17:26       ` Xin Long
  2019-10-02 17:28         ` Xin Long
@ 2019-10-02 17:41         ` Marcelo Ricardo Leitner
  2019-10-02 17:48           ` Marcelo Ricardo Leitner
  2019-10-02 18:23           ` Xin Long
  1 sibling, 2 replies; 11+ messages in thread
From: Marcelo Ricardo Leitner @ 2019-10-02 17:41 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman

On Thu, Oct 03, 2019 at 01:26:46AM +0800, Xin Long wrote:
> On Wed, Oct 2, 2019 at 8:55 PM Marcelo Ricardo Leitner
> <marcelo.leitner@gmail.com> wrote:
> >
> > On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> > > On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> > > <marcelo.leitner@gmail.com> wrote:
> > > >
> > > > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > > > >
> > > > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > > > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > > > >   [...] Call Trace:
> > > > >   [...]  security_sctp_bind_connect+0x58/0x90
> > > > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > > > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > > > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > > > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > > > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > > > >   [...]  __release_sock+0x120/0x370
> > > > >   [...]  release_sock+0x4f/0x180
> > > > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > > > >   [...]  inet_accept+0xe7/0x6f0
> > > > >
> > > > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > > > security sctp hook when doing accept() on a tcp-type socket:
> > > > >
> > > > >   inet_accept()->
> > > > >     sctp_accept():
> > > > >       lock_sock():
> > > > >           lock listening 'sk'
> > > > >                                           do_softirq():
> > > > >                                             sctp_rcv():  <-- [1]
> > > > >                                                 asconf chunk arrived and
> > > > >                                                 enqueued in 'sk' backlog
> > > > >       sctp_sock_migrate():
> > > > >           set asoc's sk to 'newsk'
> > > > >       release_sock():
> > > > >           sctp_backlog_rcv():
> > > > >             lock 'newsk'
> > > > >             sctp_process_asconf()  <-- [2]
> > > > >             unlock 'newsk'
> > > > >     sock_graft():
> > > > >         set sk_socket  <-- [3]
> > > > >
> > > > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > > > would deref it, then kernel crashed.
> > > >
> > > > Note that sctp will migrate such incoming chunks from sk to newsk in
> > > > sctp_rcv() if they arrived after the mass-migration performed at
> > > > sctp_sock_migrate().
> > > >
> > > > That said, did you explore changing inet_accept() so that
> > > > sk1->sk_prot->accept() would return sk2 still/already locked?
> > > > That would be enough to block [2] from happening as then it would be
> > > > queued on newsk backlog this time and avoid nearly duplicating
> > > > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> > > We don't have to bother inet_accept() for it. I had this one below,
> > > and I was just thinking the locks order doesn't look nice. Do you
> > > think this is more acceptable?
> > >
> > > @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> > > *sk, int flags, int *err, bool kern)
> > >          * asoc to the newsk.
> > >          */
> > >         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> > > -       if (error) {
> > > -               sk_common_release(newsk);
> > > -               newsk = NULL;
> > > +       if (!error) {
> > > +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> > > +               release_sock(sk);
> >
> > Interesting. It fixes the backlog processing, ok. Question:
> >
> > > +               release_sock(newsk);
> >
> > As newsk is hashed already and unlocked here to be locked again later
> > on inet_accept(), it could receive a packet in between (thus before
> > sock_graft() could have a chance to run), no?
> 
> You're right, it explains another call trace happened once in our testing.
> 
> The way to changing inet_accept() will also have to change all protocols'
> .accept(). Given that this issue is only triggered in a very small moment,
> can we just silently discard this asconf chunk if sk->sk_socket is NULL?
> and let peer's T4-timer retransmit it.

No no. If the change doesn't hurt other protocols, we should try that
first.  Otherwise this adds overhead to the network and we could get a
bug report soon on "valid asconf being ignored".

If that doesn't pan out, maybe your initial suggestion is the way out.
More custom code but keeps the expected behavior.

> 
> @@ -3709,6 +3709,9 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
>         struct sctp_addiphdr *hdr;
>         __u32 serial;
> 
> +       if (asoc->base.sk->sk_socket)
> +               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
> +
> 
> Note we can't do this in sctp_process_asconf_param(), as an asconf_ack
> will be sent back.
> 
> >
> > > +               *err = error;
> > > +
> > > +               return newsk;
> > >         }
> > >
> > >  out:
> > >         release_sock(sk);
> > >         *err = error;
> > > -       return newsk;
> > > +       return NULL;
> > >  }
> > >
> > > >
> > > > AFAICT TCP code would be fine with such change. Didn't check other
> > > > protocols.
> > > >
> > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02 17:41         ` Marcelo Ricardo Leitner
@ 2019-10-02 17:48           ` Marcelo Ricardo Leitner
  2019-10-02 18:23           ` Xin Long
  1 sibling, 0 replies; 11+ messages in thread
From: Marcelo Ricardo Leitner @ 2019-10-02 17:48 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman

On Wed, Oct 02, 2019 at 02:41:27PM -0300, Marcelo Ricardo Leitner wrote:
> On Thu, Oct 03, 2019 at 01:26:46AM +0800, Xin Long wrote:
> > On Wed, Oct 2, 2019 at 8:55 PM Marcelo Ricardo Leitner
> > <marcelo.leitner@gmail.com> wrote:
> > >
> > > On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> > > > On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> > > > <marcelo.leitner@gmail.com> wrote:
> > > > >
> > > > > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > > > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > > > > >
> > > > > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > > > > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > > > > >   [...] Call Trace:
> > > > > >   [...]  security_sctp_bind_connect+0x58/0x90
> > > > > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > > > > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > > > > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > > > > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > > > > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > > > > >   [...]  __release_sock+0x120/0x370
> > > > > >   [...]  release_sock+0x4f/0x180
> > > > > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > > > > >   [...]  inet_accept+0xe7/0x6f0
> > > > > >
> > > > > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > > > > security sctp hook when doing accept() on a tcp-type socket:
> > > > > >
> > > > > >   inet_accept()->
> > > > > >     sctp_accept():
> > > > > >       lock_sock():
> > > > > >           lock listening 'sk'
> > > > > >                                           do_softirq():
> > > > > >                                             sctp_rcv():  <-- [1]
> > > > > >                                                 asconf chunk arrived and
> > > > > >                                                 enqueued in 'sk' backlog
> > > > > >       sctp_sock_migrate():
> > > > > >           set asoc's sk to 'newsk'
> > > > > >       release_sock():
> > > > > >           sctp_backlog_rcv():
> > > > > >             lock 'newsk'
> > > > > >             sctp_process_asconf()  <-- [2]
> > > > > >             unlock 'newsk'
> > > > > >     sock_graft():
> > > > > >         set sk_socket  <-- [3]
> > > > > >
> > > > > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > > > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > > > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > > > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > > > > would deref it, then kernel crashed.
> > > > >
> > > > > Note that sctp will migrate such incoming chunks from sk to newsk in
> > > > > sctp_rcv() if they arrived after the mass-migration performed at
> > > > > sctp_sock_migrate().
> > > > >
> > > > > That said, did you explore changing inet_accept() so that
> > > > > sk1->sk_prot->accept() would return sk2 still/already locked?
> > > > > That would be enough to block [2] from happening as then it would be
> > > > > queued on newsk backlog this time and avoid nearly duplicating
> > > > > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> > > > We don't have to bother inet_accept() for it. I had this one below,
> > > > and I was just thinking the locks order doesn't look nice. Do you
> > > > think this is more acceptable?
> > > >
> > > > @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> > > > *sk, int flags, int *err, bool kern)
> > > >          * asoc to the newsk.
> > > >          */
> > > >         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> > > > -       if (error) {
> > > > -               sk_common_release(newsk);
> > > > -               newsk = NULL;
> > > > +       if (!error) {
> > > > +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> > > > +               release_sock(sk);
> > >
> > > Interesting. It fixes the backlog processing, ok. Question:
> > >
> > > > +               release_sock(newsk);
> > >
> > > As newsk is hashed already and unlocked here to be locked again later
> > > on inet_accept(), it could receive a packet in between (thus before
> > > sock_graft() could have a chance to run), no?
> > 
> > You're right, it explains another call trace happened once in our testing.
> > 
> > The way to changing inet_accept() will also have to change all protocols'
> > .accept(). Given that this issue is only triggered in a very small moment,
> > can we just silently discard this asconf chunk if sk->sk_socket is NULL?
> > and let peer's T4-timer retransmit it.
> 
> No no. If the change doesn't hurt other protocols, we should try that
> first.  Otherwise this adds overhead to the network and we could get a
> bug report soon on "valid asconf being ignored".
> 
> If that doesn't pan out, maybe your initial suggestion is the way out.
> More custom code but keeps the expected behavior.
> 
> > 
> > @@ -3709,6 +3709,9 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
> >         struct sctp_addiphdr *hdr;
> >         __u32 serial;
> > 
> > +       if (asoc->base.sk->sk_socket)
> > +               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);

What if we add this to sctp_backlog_rcv() instead?  As in, do not
process the backlog if so.
And force doing backlog on sctp_rcv() also.
As we are sure that there will be a subsequent lock/unlock and that it
will handle it, this could work.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog
  2019-10-02 17:41         ` Marcelo Ricardo Leitner
  2019-10-02 17:48           ` Marcelo Ricardo Leitner
@ 2019-10-02 18:23           ` Xin Long
  1 sibling, 0 replies; 11+ messages in thread
From: Xin Long @ 2019-10-02 18:23 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: network dev, linux-sctp, davem, Neil Horman

On Thu, Oct 3, 2019 at 1:41 AM Marcelo Ricardo Leitner
<marcelo.leitner@gmail.com> wrote:
>
> On Thu, Oct 03, 2019 at 01:26:46AM +0800, Xin Long wrote:
> > On Wed, Oct 2, 2019 at 8:55 PM Marcelo Ricardo Leitner
> > <marcelo.leitner@gmail.com> wrote:
> > >
> > > On Wed, Oct 02, 2019 at 04:23:52PM +0800, Xin Long wrote:
> > > > On Wed, Oct 2, 2019 at 9:04 AM Marcelo Ricardo Leitner
> > > > <marcelo.leitner@gmail.com> wrote:
> > > > >
> > > > > On Mon, Sep 30, 2019 at 09:10:18PM +0800, Xin Long wrote:
> > > > > > This patch is to fix a NULL-ptr deref crash in selinux_sctp_bind_connect:
> > > > > >
> > > > > >   [...] kasan: GPF could be caused by NULL-ptr deref or user memory access
> > > > > >   [...] RIP: 0010:selinux_sctp_bind_connect+0x16a/0x230
> > > > > >   [...] Call Trace:
> > > > > >   [...]  security_sctp_bind_connect+0x58/0x90
> > > > > >   [...]  sctp_process_asconf+0xa52/0xfd0 [sctp]
> > > > > >   [...]  sctp_sf_do_asconf+0x782/0x980 [sctp]
> > > > > >   [...]  sctp_do_sm+0x139/0x520 [sctp]
> > > > > >   [...]  sctp_assoc_bh_rcv+0x284/0x5c0 [sctp]
> > > > > >   [...]  sctp_backlog_rcv+0x45f/0x880 [sctp]
> > > > > >   [...]  __release_sock+0x120/0x370
> > > > > >   [...]  release_sock+0x4f/0x180
> > > > > >   [...]  sctp_accept+0x3f9/0x5a0 [sctp]
> > > > > >   [...]  inet_accept+0xe7/0x6f0
> > > > > >
> > > > > > It was caused by that the 'newsk' sk_socket was not set before going to
> > > > > > security sctp hook when doing accept() on a tcp-type socket:
> > > > > >
> > > > > >   inet_accept()->
> > > > > >     sctp_accept():
> > > > > >       lock_sock():
> > > > > >           lock listening 'sk'
> > > > > >                                           do_softirq():
> > > > > >                                             sctp_rcv():  <-- [1]
> > > > > >                                                 asconf chunk arrived and
> > > > > >                                                 enqueued in 'sk' backlog
> > > > > >       sctp_sock_migrate():
> > > > > >           set asoc's sk to 'newsk'
> > > > > >       release_sock():
> > > > > >           sctp_backlog_rcv():
> > > > > >             lock 'newsk'
> > > > > >             sctp_process_asconf()  <-- [2]
> > > > > >             unlock 'newsk'
> > > > > >     sock_graft():
> > > > > >         set sk_socket  <-- [3]
> > > > > >
> > > > > > As it shows, at [1] the asconf chunk would be put into the listening 'sk'
> > > > > > backlog, as accept() was holding its sock lock. Then at [2] asconf would
> > > > > > get processed with 'newsk' as asoc's sk had been set to 'newsk'. However,
> > > > > > 'newsk' sk_socket is not set until [3], while selinux_sctp_bind_connect()
> > > > > > would deref it, then kernel crashed.
> > > > >
> > > > > Note that sctp will migrate such incoming chunks from sk to newsk in
> > > > > sctp_rcv() if they arrived after the mass-migration performed at
> > > > > sctp_sock_migrate().
> > > > >
> > > > > That said, did you explore changing inet_accept() so that
> > > > > sk1->sk_prot->accept() would return sk2 still/already locked?
> > > > > That would be enough to block [2] from happening as then it would be
> > > > > queued on newsk backlog this time and avoid nearly duplicating
> > > > > inet_accept(). (too bad for this chunk, hit 2 backlogs..)
> > > > We don't have to bother inet_accept() for it. I had this one below,
> > > > and I was just thinking the locks order doesn't look nice. Do you
> > > > think this is more acceptable?
> > > >
> > > > @@ -4963,15 +4963,19 @@ static struct sock *sctp_accept(struct sock
> > > > *sk, int flags, int *err, bool kern)
> > > >          * asoc to the newsk.
> > > >          */
> > > >         error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
> > > > -       if (error) {
> > > > -               sk_common_release(newsk);
> > > > -               newsk = NULL;
> > > > +       if (!error) {
> > > > +               lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
> > > > +               release_sock(sk);
> > >
> > > Interesting. It fixes the backlog processing, ok. Question:
> > >
> > > > +               release_sock(newsk);
> > >
> > > As newsk is hashed already and unlocked here to be locked again later
> > > on inet_accept(), it could receive a packet in between (thus before
> > > sock_graft() could have a chance to run), no?
> >
> > You're right, it explains another call trace happened once in our testing.
> >
> > The way to changing inet_accept() will also have to change all protocols'
> > .accept(). Given that this issue is only triggered in a very small moment,
> > can we just silently discard this asconf chunk if sk->sk_socket is NULL?
> > and let peer's T4-timer retransmit it.
>
> No no. If the change doesn't hurt other protocols, we should try that
> first.  Otherwise this adds overhead to the network and we could get a
> bug report soon on "valid asconf being ignored".
Thanks, I will give it a try tomorrow.

>
> If that doesn't pan out, maybe your initial suggestion is the way out.
> More custom code but keeps the expected behavior.
okay

>
> >
> > @@ -3709,6 +3709,9 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
> >         struct sctp_addiphdr *hdr;
> >         __u32 serial;
> >
> > +       if (asoc->base.sk->sk_socket)
> > +               return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
> > +
> >
> > Note we can't do this in sctp_process_asconf_param(), as an asconf_ack
> > will be sent back.
> >
> > >
> > > > +               *err = error;
> > > > +
> > > > +               return newsk;
> > > >         }
> > > >
> > > >  out:
> > > >         release_sock(sk);
> > > >         *err = error;
> > > > -       return newsk;
> > > > +       return NULL;
> > > >  }
> > > >
> > > > >
> > > > > AFAICT TCP code would be fine with such change. Didn't check other
> > > > > protocols.
> > > > >
> > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-10-02 18:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-30 13:10 [PATCH net] sctp: set newsk sk_socket before processing listening sk backlog Xin Long
2019-10-01 22:30 ` David Miller
2019-10-02  1:03 ` Marcelo Ricardo Leitner
2019-10-02  8:23   ` Xin Long
2019-10-02 12:24     ` Neil Horman
2019-10-02 12:55     ` Marcelo Ricardo Leitner
2019-10-02 17:26       ` Xin Long
2019-10-02 17:28         ` Xin Long
2019-10-02 17:41         ` Marcelo Ricardo Leitner
2019-10-02 17:48           ` Marcelo Ricardo Leitner
2019-10-02 18:23           ` Xin Long

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).