From: Greg KH <greg@kroah.com> To: linux-kernel@vger.kernel.org, stable@kernel.org, Andrew Morton <akpm@osdl.org> Cc: Justin Forbes <jmforbes@linuxtx.org>, Zwane Mwaikambo <zwane@arm.linux.org.uk>, "Theodore Ts'o" <tytso@mit.edu>, Randy Dunlap <rdunlap@xenotime.net>, Dave Jones <davej@redhat.com>, Chuck Wolber <chuckw@quantumlinux.com>, Chris Wedgwood <reviews@ml.cw.f00f.org>, Michael Krufky <mkrufky@linuxtv.org>, torvalds@linux-foundation.org, akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk, nfs@lists.sourceforge.net, Neil Brown <neilb@suse.de> Subject: [patch 21/21] knfsd: Fix a race in closing NFSd connections. Date: Tue, 20 Feb 2007 17:39:00 -0800 [thread overview] Message-ID: <20070221013900.GV30227@kroah.com> (raw) In-Reply-To: <20070221013619.GA30227@kroah.com> [-- Attachment #1: knfsd-fix-a-race-in-closing-nfsd-connections.patch --] [-- Type: text/plain, Size: 6523 bytes --] -stable review patch. If anyone has any objections, please let us know. ------------------ If you lose this race, it can iput a socket inode twice and you get a BUG in fs/inode.c When I added the option for user-space to close a socket, I added some cruft to svc_delete_socket so that I could call that function when closing a socket per user-space request. This was the wrong thing to do. I should have just set SK_CLOSE and let normal mechanisms do the work. Not only wrong, but buggy. The locking is all wrong and it openned up a race where-by a socket could be closed twice. So this patch: Introduces svc_close_socket which sets SK_CLOSE then either leave the close up to a thread, or calls svc_delete_socket if it can get SK_BUSY. Adds a bias to sk_busy which is removed when SK_DEAD is set, This avoid races around shutting down the socket. Changes several 'spin_lock' to 'spin_lock_bh' where the _bh was missing. Bugzilla-url: http://bugzilla.kernel.org/show_bug.cgi?id=7916 Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> --- include/linux/sunrpc/svcsock.h | 2 - net/sunrpc/svc.c | 4 +-- net/sunrpc/svcsock.c | 52 +++++++++++++++++++++++++++++------------ 3 files changed, 41 insertions(+), 17 deletions(-) --- linux-2.6.19.4.orig/include/linux/sunrpc/svcsock.h +++ linux-2.6.19.4/include/linux/sunrpc/svcsock.h @@ -63,7 +63,7 @@ struct svc_sock { * Function prototypes. */ int svc_makesock(struct svc_serv *, int, unsigned short); -void svc_delete_socket(struct svc_sock *); +void svc_close_socket(struct svc_sock *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); --- linux-2.6.19.4.orig/net/sunrpc/svc.c +++ linux-2.6.19.4/net/sunrpc/svc.c @@ -387,7 +387,7 @@ svc_destroy(struct svc_serv *serv) svsk = list_entry(serv->sv_tempsocks.next, struct svc_sock, sk_list); - svc_delete_socket(svsk); + svc_close_socket(svsk); } if (serv->sv_shutdown) serv->sv_shutdown(serv); @@ -396,7 +396,7 @@ svc_destroy(struct svc_serv *serv) svsk = list_entry(serv->sv_permsocks.next, struct svc_sock, sk_list); - svc_delete_socket(svsk); + svc_close_socket(svsk); } cache_clean_deferred(serv); --- linux-2.6.19.4.orig/net/sunrpc/svcsock.c +++ linux-2.6.19.4/net/sunrpc/svcsock.c @@ -61,6 +61,12 @@ * after a clear, the socket must be read/accepted * if this succeeds, it must be set again. * SK_CLOSE can set at any time. It is never cleared. + * sk_inuse contains a bias of '1' until SK_DEAD is set. + * so when sk_inuse hits zero, we know the socket is dead + * and no-one is using it. + * SK_DEAD can only be set while SK_BUSY is held which ensures + * no other thread will be using the socket or will try to + * set SK_DEAD. * */ @@ -69,6 +75,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int pmap_reg); +static void svc_delete_socket(struct svc_sock *svsk); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); @@ -299,8 +306,9 @@ void svc_reserve(struct svc_rqst *rqstp, static inline void svc_sock_put(struct svc_sock *svsk) { - if (atomic_dec_and_test(&svsk->sk_inuse) && - test_bit(SK_DEAD, &svsk->sk_flags)) { + if (atomic_dec_and_test(&svsk->sk_inuse)) { + BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); + dprintk("svc: releasing dead socket\n"); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); @@ -490,7 +498,7 @@ svc_sock_names(char *buf, struct svc_ser if (!serv) return 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { int onelen = one_sock_name(buf+len, svsk); if (toclose && strcmp(toclose, buf+len) == 0) @@ -498,12 +506,12 @@ svc_sock_names(char *buf, struct svc_ser else len += onelen; } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... */ - svc_delete_socket(closesk); + svc_close_socket(closesk); else if (toclose) return -ENOENT; return len; @@ -653,6 +661,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } + if (test_bit(SK_CLOSE, &svsk->sk_flags)) { + svc_delete_socket(svsk); + return 0; + } + clear_bit(SK_DATA, &svsk->sk_flags); while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { @@ -1142,7 +1155,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp) rqstp->rq_sock->sk_server->sv_name, (sent<0)?"got error":"sent only", sent, xbufp->len); - svc_delete_socket(rqstp->rq_sock); + set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); + svc_sock_enqueue(rqstp->rq_sock); sent = -EAGAIN; } return sent; @@ -1461,7 +1475,7 @@ svc_setup_socket(struct svc_serv *serv, svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; svsk->sk_server = serv; - atomic_set(&svsk->sk_inuse, 0); + atomic_set(&svsk->sk_inuse, 1); svsk->sk_lastrecv = get_seconds(); spin_lock_init(&svsk->sk_defer_lock); INIT_LIST_HEAD(&svsk->sk_deferred); @@ -1582,7 +1596,7 @@ bummer: /* * Remove a dead socket */ -void +static void svc_delete_socket(struct svc_sock *svsk) { struct svc_serv *serv; @@ -1608,16 +1622,26 @@ svc_delete_socket(struct svc_sock *svsk) * while still attached to a queue, the queue itself * is about to be destroyed (in svc_destroy). */ - if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) + if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { + BUG_ON(atomic_read(&svsk->sk_inuse)<2); + atomic_dec(&svsk->sk_inuse); if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; + } - /* This atomic_inc should be needed - svc_delete_socket - * should have the semantic of dropping a reference. - * But it doesn't yet.... - */ - atomic_inc(&svsk->sk_inuse); spin_unlock_bh(&serv->sv_lock); +} + +void svc_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) + /* someone else will have to effect the close */ + return; + + atomic_inc(&svsk->sk_inuse); + svc_delete_socket(svsk); + clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_put(svsk); } --
WARNING: multiple messages have this Message-ID (diff)
From: Greg KH <greg@kroah.com> To: linux-kernel@vger.kernel.org, stable@kernel.org, Andrew Morton <akpm@osdl.org> Cc: Justin Forbes <jmforbes@linuxtx.org>, Zwane Mwaikambo <zwane@arm.linux.org.uk>, Theodore Ts'o <tytso@mit.edu>, Randy Dunlap <rdunlap@xenotime.net>, Dave Jones <davej@redhat.com>, Chuck Wolber <chuckw@quantumlinux.com>, Chris Wedgwood <reviews@ml.cw.f00f.org>, Michael Krufky <mkrufky@linuxtv.org>, torvalds@linux-foundation.org, akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk, nfs@lists.sourceforge.net, Neil Brown <neilb@suse.de> Subject: [patch 21/21] knfsd: Fix a race in closing NFSd connections. Date: Tue, 20 Feb 2007 17:39:00 -0800 [thread overview] Message-ID: <20070221013900.GV30227@kroah.com> (raw) In-Reply-To: <20070221013619.GA30227@kroah.com> -stable review patch. If anyone has any objections, please let us know. ------------------ If you lose this race, it can iput a socket inode twice and you get a BUG in fs/inode.c When I added the option for user-space to close a socket, I added some cruft to svc_delete_socket so that I could call that function when closing a socket per user-space request. This was the wrong thing to do. I should have just set SK_CLOSE and let normal mechanisms do the work. Not only wrong, but buggy. The locking is all wrong and it openned up a race where-by a socket could be closed twice. So this patch: Introduces svc_close_socket which sets SK_CLOSE then either leave the close up to a thread, or calls svc_delete_socket if it can get SK_BUSY. Adds a bias to sk_busy which is removed when SK_DEAD is set, This avoid races around shutting down the socket. Changes several 'spin_lock' to 'spin_lock_bh' where the _bh was missing. Bugzilla-url: http://bugzilla.kernel.org/show_bug.cgi?id=7916 Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> --- include/linux/sunrpc/svcsock.h | 2 - net/sunrpc/svc.c | 4 +-- net/sunrpc/svcsock.c | 52 +++++++++++++++++++++++++++++------------ 3 files changed, 41 insertions(+), 17 deletions(-) --- linux-2.6.19.4.orig/include/linux/sunrpc/svcsock.h +++ linux-2.6.19.4/include/linux/sunrpc/svcsock.h @@ -63,7 +63,7 @@ struct svc_sock { * Function prototypes. */ int svc_makesock(struct svc_serv *, int, unsigned short); -void svc_delete_socket(struct svc_sock *); +void svc_close_socket(struct svc_sock *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); --- linux-2.6.19.4.orig/net/sunrpc/svc.c +++ linux-2.6.19.4/net/sunrpc/svc.c @@ -387,7 +387,7 @@ svc_destroy(struct svc_serv *serv) svsk = list_entry(serv->sv_tempsocks.next, struct svc_sock, sk_list); - svc_delete_socket(svsk); + svc_close_socket(svsk); } if (serv->sv_shutdown) serv->sv_shutdown(serv); @@ -396,7 +396,7 @@ svc_destroy(struct svc_serv *serv) svsk = list_entry(serv->sv_permsocks.next, struct svc_sock, sk_list); - svc_delete_socket(svsk); + svc_close_socket(svsk); } cache_clean_deferred(serv); --- linux-2.6.19.4.orig/net/sunrpc/svcsock.c +++ linux-2.6.19.4/net/sunrpc/svcsock.c @@ -61,6 +61,12 @@ * after a clear, the socket must be read/accepted * if this succeeds, it must be set again. * SK_CLOSE can set at any time. It is never cleared. + * sk_inuse contains a bias of '1' until SK_DEAD is set. + * so when sk_inuse hits zero, we know the socket is dead + * and no-one is using it. + * SK_DEAD can only be set while SK_BUSY is held which ensures + * no other thread will be using the socket or will try to + * set SK_DEAD. * */ @@ -69,6 +75,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int pmap_reg); +static void svc_delete_socket(struct svc_sock *svsk); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); @@ -299,8 +306,9 @@ void svc_reserve(struct svc_rqst *rqstp, static inline void svc_sock_put(struct svc_sock *svsk) { - if (atomic_dec_and_test(&svsk->sk_inuse) && - test_bit(SK_DEAD, &svsk->sk_flags)) { + if (atomic_dec_and_test(&svsk->sk_inuse)) { + BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); + dprintk("svc: releasing dead socket\n"); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); @@ -490,7 +498,7 @@ svc_sock_names(char *buf, struct svc_ser if (!serv) return 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { int onelen = one_sock_name(buf+len, svsk); if (toclose && strcmp(toclose, buf+len) == 0) @@ -498,12 +506,12 @@ svc_sock_names(char *buf, struct svc_ser else len += onelen; } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... */ - svc_delete_socket(closesk); + svc_close_socket(closesk); else if (toclose) return -ENOENT; return len; @@ -653,6 +661,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } + if (test_bit(SK_CLOSE, &svsk->sk_flags)) { + svc_delete_socket(svsk); + return 0; + } + clear_bit(SK_DATA, &svsk->sk_flags); while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { @@ -1142,7 +1155,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp) rqstp->rq_sock->sk_server->sv_name, (sent<0)?"got error":"sent only", sent, xbufp->len); - svc_delete_socket(rqstp->rq_sock); + set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); + svc_sock_enqueue(rqstp->rq_sock); sent = -EAGAIN; } return sent; @@ -1461,7 +1475,7 @@ svc_setup_socket(struct svc_serv *serv, svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; svsk->sk_server = serv; - atomic_set(&svsk->sk_inuse, 0); + atomic_set(&svsk->sk_inuse, 1); svsk->sk_lastrecv = get_seconds(); spin_lock_init(&svsk->sk_defer_lock); INIT_LIST_HEAD(&svsk->sk_deferred); @@ -1582,7 +1596,7 @@ bummer: /* * Remove a dead socket */ -void +static void svc_delete_socket(struct svc_sock *svsk) { struct svc_serv *serv; @@ -1608,16 +1622,26 @@ svc_delete_socket(struct svc_sock *svsk) * while still attached to a queue, the queue itself * is about to be destroyed (in svc_destroy). */ - if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) + if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { + BUG_ON(atomic_read(&svsk->sk_inuse)<2); + atomic_dec(&svsk->sk_inuse); if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; + } - /* This atomic_inc should be needed - svc_delete_socket - * should have the semantic of dropping a reference. - * But it doesn't yet.... - */ - atomic_inc(&svsk->sk_inuse); spin_unlock_bh(&serv->sv_lock); +} + +void svc_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) + /* someone else will have to effect the close */ + return; + + atomic_inc(&svsk->sk_inuse); + svc_delete_socket(svsk); + clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_put(svsk); } --
next prev parent reply other threads:[~2007-02-21 1:41 UTC|newest] Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top [not found] <20070221012758.925122216@mini.kroah.org> 2007-02-21 1:36 ` [patch 00/21] 2.6.19-stable review Greg KH 2007-02-21 1:36 ` [patch 01/21] V4L: cx88: Fix lockup on suspend Greg KH 2007-02-22 1:00 ` Chuck Ebbert 2007-02-22 1:14 ` Michael Krufky 2007-02-21 1:36 ` [patch 02/21] V4L: Fix quickcam communicator driver for big endian architectures Greg KH 2007-02-21 1:36 ` [patch 03/21] V4L: fix ks0127 status flags Greg KH 2007-02-21 1:36 ` [patch 04/21] V4L: tveeprom: autodetect LG TAPC G701D as tuner type 37 Greg KH 2007-02-21 1:37 ` [patch 05/21] V4L: buf_qbuf: fix videobuf_queue->stream corruption and lockup Greg KH 2007-02-21 1:37 ` [patch 06/21] net/smc911x: match up spin lock/unlock Greg KH 2007-02-21 1:37 ` [patch 07/21] rtc-pcf8563: detect polarity of century bit automatically Greg KH 2007-02-21 1:37 ` [patch 08/21] aio: fix buggy put_ioctx call in aio_complete - v2 Greg KH 2007-02-21 1:37 ` [patch 09/21] x86_64: fix 2.6.18 regression - PTRACE_OLDSETOPTIONS should be accepted Greg KH 2007-02-21 1:37 ` [uml-devel] " Greg KH 2007-02-21 1:37 ` [patch 10/21] ide: fix drive side 80c cable check Greg KH 2007-02-21 1:37 ` [patch 11/21] pata_amd: fix an obvious bug in cable detection Greg KH 2007-02-21 1:37 ` [patch 12/21] bcm43xx: Fix for oops on resume Greg KH 2007-02-21 1:38 ` [patch 13/21] bcm43xx: Fix for oops on ampdu status Greg KH 2007-02-21 1:38 ` [patch 14/21] usb-audio: work around wrong frequency in CM6501 descriptors Greg KH 2007-02-21 1:38 ` [patch 15/21] usbaudio - Fix Oops with broken usb descriptors Greg KH 2007-02-21 1:38 ` [patch 16/21] usbaudio - Fix Oops with unconventional sample rates Greg KH 2007-02-21 1:38 ` [patch 17/21] Use different constraint for gcc < 4.1 in bitops Greg KH 2007-02-21 1:38 ` [patch 18/21] prism54: correct assignment of DOT1XENABLE in WE-19 codepaths Greg KH 2007-02-21 1:38 ` [patch 19/21] net, 8139too.c: fix netpoll deadlock Greg KH 2007-02-21 1:38 ` [patch 20/21] Keys: Fix key serial number collision handling Greg KH 2007-02-21 1:39 ` Greg KH [this message] 2007-02-21 1:39 ` [patch 21/21] knfsd: Fix a race in closing NFSd connections Greg KH 2007-02-21 13:36 ` [patch 00/21] 2.6.19-stable review Stefan Richter 2007-02-21 13:37 ` Stefan Richter 2007-03-09 5:35 ` Adrian Bunk 2007-02-21 16:38 ` Chuck Ebbert 2007-02-21 16:50 ` Chuck Ebbert 2007-02-21 19:31 ` Chuck Ebbert 2007-02-21 19:47 ` Andrew Morton 2007-02-21 20:09 ` Linus Torvalds 2007-02-21 22:45 ` Eric W. Biederman 2007-02-28 6:37 ` Eric W. Biederman 2007-02-28 8:51 ` Zwane Mwaikambo 2007-02-28 12:28 ` Eric W. Biederman 2007-02-28 19:52 ` [stable] " Greg KH 2007-02-28 23:25 ` Eric W. Biederman 2007-02-21 20:13 ` Eric W. Biederman 2007-02-21 20:21 ` Chuck Ebbert 2007-02-21 22:19 ` Andi Kleen 2007-02-21 22:20 ` Andi Kleen 2007-02-21 22:39 ` Chuck Ebbert 2007-02-22 1:19 ` Andi Kleen 2007-02-21 20:39 ` Greg KH 2007-02-21 20:44 ` Chuck Ebbert 2007-02-21 22:33 ` Chuck Ebbert 2007-02-21 22:35 ` Chuck Ebbert 2007-02-21 22:43 ` Chuck Ebbert 2007-02-22 16:09 ` Chuck Ebbert
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20070221013900.GV30227@kroah.com \ --to=greg@kroah.com \ --cc=akpm@linux-foundation.org \ --cc=akpm@osdl.org \ --cc=alan@lxorguk.ukuu.org.uk \ --cc=chuckw@quantumlinux.com \ --cc=davej@redhat.com \ --cc=jmforbes@linuxtx.org \ --cc=linux-kernel@vger.kernel.org \ --cc=mkrufky@linuxtv.org \ --cc=neilb@suse.de \ --cc=nfs@lists.sourceforge.net \ --cc=rdunlap@xenotime.net \ --cc=reviews@ml.cw.f00f.org \ --cc=stable@kernel.org \ --cc=torvalds@linux-foundation.org \ --cc=tytso@mit.edu \ --cc=zwane@arm.linux.org.uk \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.