All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization
@ 2017-11-07 14:29 Scott Mayhew
  2017-11-07 15:30 ` Trond Myklebust
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Mayhew @ 2017-11-07 14:29 UTC (permalink / raw)
  To: trond.myklebust, anna.schumaker; +Cc: linux-nfs

The following deadlock can occur between a process waiting for a client
to initialize in while walking the client list and another process
waiting for the nfs_clid_init_mutex so it can initialize that client:

Process 1                               Process 2
---------                               ---------
spin_lock(&nn->nfs_client_lock);
list_add_tail(&CLIENTA->cl_share_link,
        &nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
                                        spin_lock(&nn->nfs_client_lock);
                                        list_add_tail(&CLIENTB->cl_share_link,
                                                &nn->nfs_client_list);
                                        spin_unlock(&nn->nfs_client_lock);
                                        mutex_lock(&nfs_clid_init_mutex);
                                        nfs41_walk_client_list(clp, result, cred);
                                        nfs_wait_client_init_complete(CLIENTA);
(waiting for nfs_clid_init_mutex)

Make sure nfs_match_client() only evaluates clients that have completed
initialization in order to prevent that deadlock.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
---
 fs/nfs/client.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 22880ef..8b093994 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,21 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	const struct sockaddr *sap = data->addr;
 	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
 
+again:
 	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
 	        const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 		/* Don't match clients that failed to initialise properly */
 		if (clp->cl_cons_state < 0)
 			continue;
 
+		if (clp->cl_minorversion > 0 &&
+				clp->cl_cons_state > NFS_CS_READY) {
+			spin_unlock(&nn->nfs_client_lock);
+			nfs_wait_client_init_complete(clp);
+			spin_lock(&nn->nfs_client_lock);
+			goto again;
+		}
+
 		/* Different NFS versions cannot share the same nfs_client */
 		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
 			continue;
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization
  2017-11-07 14:29 [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization Scott Mayhew
@ 2017-11-07 15:30 ` Trond Myklebust
  2017-11-07 18:26   ` Scott Mayhew
  0 siblings, 1 reply; 14+ messages in thread
From: Trond Myklebust @ 2017-11-07 15:30 UTC (permalink / raw)
  To: anna.schumaker, smayhew; +Cc: linux-nfs

T24gVHVlLCAyMDE3LTExLTA3IGF0IDA5OjI5IC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+
IFRoZSBmb2xsb3dpbmcgZGVhZGxvY2sgY2FuIG9jY3VyIGJldHdlZW4gYSBwcm9jZXNzIHdhaXRp
bmcgZm9yIGENCj4gY2xpZW50DQo+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fsa2luZyB0aGUg
Y2xpZW50IGxpc3QgYW5kIGFub3RoZXIgcHJvY2Vzcw0KPiB3YWl0aW5nIGZvciB0aGUgbmZzX2Ns
aWRfaW5pdF9tdXRleCBzbyBpdCBjYW4gaW5pdGlhbGl6ZSB0aGF0IGNsaWVudDoNCj4gDQo+IFBy
b2Nlc3MgMSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBQcm9jZXNzIDINCj4gLS0tLS0t
LS0tICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiBzcGluX2xvY2so
Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiBsaXN0X2FkZF90YWlsKCZDTElFTlRBLT5jbF9zaGFy
ZV9saW5rLA0KPiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gc3Bpbl91bmxvY2so
Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgc3Bpbl9sb2NrKCZubi0NCj4gPm5mc19jbGllbnRfbG9jayk7DQo+ICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsaXN0X2FkZF90YWlsKCZDTElFTlRCLQ0K
PiA+Y2xfc2hhcmVfbGluaywNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgJm5uLQ0KPiA+bmZzX2NsaWVudF9saXN0KTsNCj4gICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgIHNwaW5fdW5sb2NrKCZubi0NCj4gPm5mc19jbGllbnRf
bG9jayk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRleF9s
b2NrKCZuZnNfY2xpZF9pbml0X211dA0KPiBleCk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICBuZnM0MV93YWxrX2NsaWVudF9saXN0KGNscCwNCj4gcmVzdWx0LCBj
cmVkKTsNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0
X2NsaWVudF9pbml0X2NvbXBsZXRlDQo+IChDTElFTlRBKTsNCj4gKHdhaXRpbmcgZm9yIG5mc19j
bGlkX2luaXRfbXV0ZXgpDQo+IA0KPiBNYWtlIHN1cmUgbmZzX21hdGNoX2NsaWVudCgpIG9ubHkg
ZXZhbHVhdGVzIGNsaWVudHMgdGhhdCBoYXZlDQo+IGNvbXBsZXRlZA0KPiBpbml0aWFsaXphdGlv
biBpbiBvcmRlciB0byBwcmV2ZW50IHRoYXQgZGVhZGxvY2suDQo+IA0KPiBTaWduZWQtb2ZmLWJ5
OiBTY290dCBNYXloZXcgPHNtYXloZXdAcmVkaGF0LmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvY2xp
ZW50LmMgfCA5ICsrKysrKysrKw0KPiAgMSBmaWxlIGNoYW5nZWQsIDkgaW5zZXJ0aW9ucygrKQ0K
PiANCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9jbGllbnQuYyBiL2ZzL25mcy9jbGllbnQuYw0KPiBp
bmRleCAyMjg4MGVmLi44YjA5Mzk5NCAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL2NsaWVudC5jDQo+
ICsrKyBiL2ZzL25mcy9jbGllbnQuYw0KPiBAQCAtMjkxLDEyICsyOTEsMjEgQEAgc3RhdGljIHN0
cnVjdCBuZnNfY2xpZW50DQo+ICpuZnNfbWF0Y2hfY2xpZW50KGNvbnN0IHN0cnVjdCBuZnNfY2xp
ZW50X2luaXRkYXRhICpkYXQNCj4gIAljb25zdCBzdHJ1Y3Qgc29ja2FkZHIgKnNhcCA9IGRhdGEt
PmFkZHI7DQo+ICAJc3RydWN0IG5mc19uZXQgKm5uID0gbmV0X2dlbmVyaWMoZGF0YS0+bmV0LCBu
ZnNfbmV0X2lkKTsNCj4gIA0KPiArYWdhaW46DQo+ICAJbGlzdF9mb3JfZWFjaF9lbnRyeShjbHAs
ICZubi0+bmZzX2NsaWVudF9saXN0LA0KPiBjbF9zaGFyZV9saW5rKSB7DQo+ICAJICAgICAgICBj
b25zdCBzdHJ1Y3Qgc29ja2FkZHIgKmNsYXAgPSAoc3RydWN0IHNvY2thZGRyDQo+ICopJmNscC0+
Y2xfYWRkcjsNCj4gIAkJLyogRG9uJ3QgbWF0Y2ggY2xpZW50cyB0aGF0IGZhaWxlZCB0byBpbml0
aWFsaXNlDQo+IHByb3Blcmx5ICovDQo+ICAJCWlmIChjbHAtPmNsX2NvbnNfc3RhdGUgPCAwKQ0K
PiAgCQkJY29udGludWU7DQo+ICANCj4gKwkJaWYgKGNscC0+Y2xfbWlub3J2ZXJzaW9uID4gMCAm
Jg0KPiArCQkJCWNscC0+Y2xfY29uc19zdGF0ZSA+IE5GU19DU19SRUFEWSkgew0KPiArCQkJc3Bp
bl91bmxvY2soJm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiArCQkJbmZzX3dhaXRfY2xpZW50X2lu
aXRfY29tcGxldGUoY2xwKTsNCj4gKwkJCXNwaW5fbG9jaygmbm4tPm5mc19jbGllbnRfbG9jayk7
DQo+ICsJCQlnb3RvIGFnYWluOw0KPiArCQl9DQo+ICsNCj4gIAkJLyogRGlmZmVyZW50IE5GUyB2
ZXJzaW9ucyBjYW5ub3Qgc2hhcmUgdGhlIHNhbWUNCj4gbmZzX2NsaWVudCAqLw0KPiAgCQlpZiAo
Y2xwLT5ycGNfb3BzICE9IGRhdGEtPm5mc19tb2QtPnJwY19vcHMpDQo+ICAJCQljb250aW51ZTsN
Cg0KV2h5IHRoZSB0ZXN0IGZvciBjbHAtPmNsX21pbm9ydmVyc2lvbj8gV2hhdCdzIHNvIG1pbm9y
IHZlcnNpb24gc3BlY2lmaWMNCmFib3V0IGFueSBvZiB0aGlzPw0KDQotLSANClRyb25kIE15a2xl
YnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyLCBQcmltYXJ5RGF0YQ0KdHJvbmQubXlr
bGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization
  2017-11-07 15:30 ` Trond Myklebust
@ 2017-11-07 18:26   ` Scott Mayhew
  2017-11-07 18:30     ` Trond Myklebust
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Mayhew @ 2017-11-07 18:26 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: anna.schumaker, linux-nfs

On Tue, 07 Nov 2017, Trond Myklebust wrote:

> On Tue, 2017-11-07 at 09:29 -0500, Scott Mayhew wrote:
> > The following deadlock can occur between a process waiting for a
> > client
> > to initialize in while walking the client list and another process
> > waiting for the nfs_clid_init_mutex so it can initialize that client:
> > 
> > Process 1                               Process 2
> > ---------                               ---------
> > spin_lock(&nn->nfs_client_lock);
> > list_add_tail(&CLIENTA->cl_share_link,
> >         &nn->nfs_client_list);
> > spin_unlock(&nn->nfs_client_lock);
> >                                         spin_lock(&nn-
> > >nfs_client_lock);
> >                                         list_add_tail(&CLIENTB-
> > >cl_share_link,
> >                                                 &nn-
> > >nfs_client_list);
> >                                         spin_unlock(&nn-
> > >nfs_client_lock);
> >                                         mutex_lock(&nfs_clid_init_mut
> > ex);
> >                                         nfs41_walk_client_list(clp,
> > result, cred);
> >                                         nfs_wait_client_init_complete
> > (CLIENTA);
> > (waiting for nfs_clid_init_mutex)
> > 
> > Make sure nfs_match_client() only evaluates clients that have
> > completed
> > initialization in order to prevent that deadlock.
> > 
> > Signed-off-by: Scott Mayhew <smayhew@redhat.com>
> > ---
> >  fs/nfs/client.c | 9 +++++++++
> >  1 file changed, 9 insertions(+)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 22880ef..8b093994 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -291,12 +291,21 @@ static struct nfs_client
> > *nfs_match_client(const struct nfs_client_initdata *dat
> >  	const struct sockaddr *sap = data->addr;
> >  	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
> >  
> > +again:
> >  	list_for_each_entry(clp, &nn->nfs_client_list,
> > cl_share_link) {
> >  	        const struct sockaddr *clap = (struct sockaddr
> > *)&clp->cl_addr;
> >  		/* Don't match clients that failed to initialise
> > properly */
> >  		if (clp->cl_cons_state < 0)
> >  			continue;
> >  
> > +		if (clp->cl_minorversion > 0 &&
> > +				clp->cl_cons_state > NFS_CS_READY) {
> > +			spin_unlock(&nn->nfs_client_lock);
> > +			nfs_wait_client_init_complete(clp);
> > +			spin_lock(&nn->nfs_client_lock);
> > +			goto again;
> > +		}
> > +
> >  		/* Different NFS versions cannot share the same
> > nfs_client */
> >  		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
> >  			continue;
> 
> Why the test for clp->cl_minorversion? What's so minor version specific
> about any of this?

The deadlock doesn't occur with v4.0 clients because those are being
marked NFS_CS_READY in nfs4_client_client(), before the trunking
detection

        if (!nfs4_has_session(clp))
                nfs_mark_client_ready(clp, NFS_CS_READY);

        error = nfs4_discover_server_trunking(clp, &old);

Now that I think about it, that seems wrong because nfs4_match_client()
could be comparing cl_clientid and cl_owner_id values that haven't been
established yet... in fact when I run my reproducer against two ip
addresses on the same server I wind up with multiple clients with the
same cl_clientid and cl_owner_id  

crash> list -H 0xffff9fc2b6327118 -o nfs_client.cl_share_link -s nfs_client.cl_clientid,cl_owner_id
ffff9fc2b352c800
  cl_clientid = 0xb81ff359309120bb
  cl_owner_id = 0xffff9fc2ae9644c0 "Linux NFSv4.0 fedora26.smayhew.test"
ffff9fc2aded7400
  cl_clientid = 0xb81ff359309120bb
  cl_owner_id = 0xffff9fc2b0584f80 "Linux NFSv4.0 fedora26.smayhew.test"

I'll poke around a bit more.

-Scott
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization
  2017-11-07 18:26   ` Scott Mayhew
@ 2017-11-07 18:30     ` Trond Myklebust
  2017-11-20 21:28       ` Scott Mayhew
  0 siblings, 1 reply; 14+ messages in thread
From: Trond Myklebust @ 2017-11-07 18:30 UTC (permalink / raw)
  To: smayhew; +Cc: anna.schumaker, linux-nfs

T24gVHVlLCAyMDE3LTExLTA3IGF0IDEzOjI2IC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+
IE9uIFR1ZSwgMDcgTm92IDIwMTcsIFRyb25kIE15a2xlYnVzdCB3cm90ZToNCj4gDQo+ID4gT24g
VHVlLCAyMDE3LTExLTA3IGF0IDA5OjI5IC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+ID4g
PiBUaGUgZm9sbG93aW5nIGRlYWRsb2NrIGNhbiBvY2N1ciBiZXR3ZWVuIGEgcHJvY2VzcyB3YWl0
aW5nIGZvciBhDQo+ID4gPiBjbGllbnQNCj4gPiA+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fs
a2luZyB0aGUgY2xpZW50IGxpc3QgYW5kIGFub3RoZXINCj4gPiA+IHByb2Nlc3MNCj4gPiA+IHdh
aXRpbmcgZm9yIHRoZSBuZnNfY2xpZF9pbml0X211dGV4IHNvIGl0IGNhbiBpbml0aWFsaXplIHRo
YXQNCj4gPiA+IGNsaWVudDoNCj4gPiA+IA0KPiA+ID4gUHJvY2VzcyAxICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgIFByb2Nlc3MgMg0KPiA+ID4gLS0tLS0tLS0tICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiA+ID4gc3Bpbl9sb2NrKCZubi0+bmZzX2NsaWVu
dF9sb2NrKTsNCj4gPiA+IGxpc3RfYWRkX3RhaWwoJkNMSUVOVEEtPmNsX3NoYXJlX2xpbmssDQo+
ID4gPiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gPiA+IHNwaW5fdW5sb2NrKCZu
bi0+bmZzX2NsaWVudF9sb2NrKTsNCj4gPiA+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICBzcGluX2xvY2soJm5uLQ0KPiA+ID4gPiBuZnNfY2xpZW50X2xvY2spOw0KPiA+
ID4gDQo+ID4gPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGlzdF9h
ZGRfdGFpbCgmQ0xJRU5UQi0NCj4gPiA+ID4gY2xfc2hhcmVfbGluaywNCj4gPiA+IA0KPiA+ID4g
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJm5uLQ0KPiA+
ID4gPiBuZnNfY2xpZW50X2xpc3QpOw0KPiA+ID4gDQo+ID4gPiAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgc3Bpbl91bmxvY2soJm5uLQ0KPiA+ID4gPiBuZnNfY2xpZW50
X2xvY2spOw0KPiA+ID4gDQo+ID4gPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgbXV0ZXhfbG9jaygmbmZzX2NsaWRfaW5pdA0KPiA+ID4gX211dA0KPiA+ID4gZXgpOw0K
PiA+ID4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mczQxX3dhbGtf
Y2xpZW50X2xpc3QoY2wNCj4gPiA+IHAsDQo+ID4gPiByZXN1bHQsIGNyZWQpOw0KPiA+ID4gICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0X2NsaWVudF9pbml0
X2NvbXANCj4gPiA+IGxldGUNCj4gPiA+IChDTElFTlRBKTsNCj4gPiA+ICh3YWl0aW5nIGZvciBu
ZnNfY2xpZF9pbml0X211dGV4KQ0KPiA+ID4gDQo+ID4gPiBNYWtlIHN1cmUgbmZzX21hdGNoX2Ns
aWVudCgpIG9ubHkgZXZhbHVhdGVzIGNsaWVudHMgdGhhdCBoYXZlDQo+ID4gPiBjb21wbGV0ZWQN
Cj4gPiA+IGluaXRpYWxpemF0aW9uIGluIG9yZGVyIHRvIHByZXZlbnQgdGhhdCBkZWFkbG9jay4N
Cj4gPiA+IA0KPiA+ID4gU2lnbmVkLW9mZi1ieTogU2NvdHQgTWF5aGV3IDxzbWF5aGV3QHJlZGhh
dC5jb20+DQo+ID4gPiAtLS0NCj4gPiA+ICBmcy9uZnMvY2xpZW50LmMgfCA5ICsrKysrKysrKw0K
PiA+ID4gIDEgZmlsZSBjaGFuZ2VkLCA5IGluc2VydGlvbnMoKykNCj4gPiA+IA0KPiA+ID4gZGlm
ZiAtLWdpdCBhL2ZzL25mcy9jbGllbnQuYyBiL2ZzL25mcy9jbGllbnQuYw0KPiA+ID4gaW5kZXgg
MjI4ODBlZi4uOGIwOTM5OTQgMTAwNjQ0DQo+ID4gPiAtLS0gYS9mcy9uZnMvY2xpZW50LmMNCj4g
PiA+ICsrKyBiL2ZzL25mcy9jbGllbnQuYw0KPiA+ID4gQEAgLTI5MSwxMiArMjkxLDIxIEBAIHN0
YXRpYyBzdHJ1Y3QgbmZzX2NsaWVudA0KPiA+ID4gKm5mc19tYXRjaF9jbGllbnQoY29uc3Qgc3Ry
dWN0IG5mc19jbGllbnRfaW5pdGRhdGEgKmRhdA0KPiA+ID4gIAljb25zdCBzdHJ1Y3Qgc29ja2Fk
ZHIgKnNhcCA9IGRhdGEtPmFkZHI7DQo+ID4gPiAgCXN0cnVjdCBuZnNfbmV0ICpubiA9IG5ldF9n
ZW5lcmljKGRhdGEtPm5ldCwgbmZzX25ldF9pZCk7DQo+ID4gPiAgDQo+ID4gPiArYWdhaW46DQo+
ID4gPiAgCWxpc3RfZm9yX2VhY2hfZW50cnkoY2xwLCAmbm4tPm5mc19jbGllbnRfbGlzdCwNCj4g
PiA+IGNsX3NoYXJlX2xpbmspIHsNCj4gPiA+ICAJICAgICAgICBjb25zdCBzdHJ1Y3Qgc29ja2Fk
ZHIgKmNsYXAgPSAoc3RydWN0IHNvY2thZGRyDQo+ID4gPiAqKSZjbHAtPmNsX2FkZHI7DQo+ID4g
PiAgCQkvKiBEb24ndCBtYXRjaCBjbGllbnRzIHRoYXQgZmFpbGVkIHRvIGluaXRpYWxpc2UNCj4g
PiA+IHByb3Blcmx5ICovDQo+ID4gPiAgCQlpZiAoY2xwLT5jbF9jb25zX3N0YXRlIDwgMCkNCj4g
PiA+ICAJCQljb250aW51ZTsNCj4gPiA+ICANCj4gPiA+ICsJCWlmIChjbHAtPmNsX21pbm9ydmVy
c2lvbiA+IDAgJiYNCj4gPiA+ICsJCQkJY2xwLT5jbF9jb25zX3N0YXRlID4NCj4gPiA+IE5GU19D
U19SRUFEWSkgew0KPiA+ID4gKwkJCXNwaW5fdW5sb2NrKCZubi0+bmZzX2NsaWVudF9sb2NrKTsN
Cj4gPiA+ICsJCQluZnNfd2FpdF9jbGllbnRfaW5pdF9jb21wbGV0ZShjbHApOw0KPiA+ID4gKwkJ
CXNwaW5fbG9jaygmbm4tPm5mc19jbGllbnRfbG9jayk7DQo+ID4gPiArCQkJZ290byBhZ2FpbjsN
Cj4gPiA+ICsJCX0NCj4gPiA+ICsNCj4gPiA+ICAJCS8qIERpZmZlcmVudCBORlMgdmVyc2lvbnMg
Y2Fubm90IHNoYXJlIHRoZSBzYW1lDQo+ID4gPiBuZnNfY2xpZW50ICovDQo+ID4gPiAgCQlpZiAo
Y2xwLT5ycGNfb3BzICE9IGRhdGEtPm5mc19tb2QtPnJwY19vcHMpDQo+ID4gPiAgCQkJY29udGlu
dWU7DQo+ID4gDQo+ID4gV2h5IHRoZSB0ZXN0IGZvciBjbHAtPmNsX21pbm9ydmVyc2lvbj8gV2hh
dCdzIHNvIG1pbm9yIHZlcnNpb24NCj4gPiBzcGVjaWZpYw0KPiA+IGFib3V0IGFueSBvZiB0aGlz
Pw0KPiANCj4gVGhlIGRlYWRsb2NrIGRvZXNuJ3Qgb2NjdXIgd2l0aCB2NC4wIGNsaWVudHMgYmVj
YXVzZSB0aG9zZSBhcmUgYmVpbmcNCj4gbWFya2VkIE5GU19DU19SRUFEWSBpbiBuZnM0X2NsaWVu
dF9jbGllbnQoKSwgYmVmb3JlIHRoZSB0cnVua2luZw0KPiBkZXRlY3Rpb24NCg0KU3VyZSwgYnV0
IHRoZSByb290IGNhdXNlIHlvdSBhcmUgYXNzZXJ0aW5nIGlzIHRoYXQgdGhlIG5mc19jbGllbnQg
aGFzDQpub3QgZmluaXNoZWQgaW5pdGlhbGlzaW5nLiBXaGF0IGlzIG1pbm9ydmVyc2lvbi1zcGVj
aWZpYyAob3IgZXZlbg0KTkZTdjQtc3BlY2lmaWMpIGFib3V0IHRoYXQ/DQoNCi0tIA0KVHJvbmQg
TXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXIsIFByaW1hcnlEYXRhDQp0cm9u
ZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization
  2017-11-07 18:30     ` Trond Myklebust
@ 2017-11-20 21:28       ` Scott Mayhew
  2017-11-20 21:41         ` [PATCH] nfs: fix a deadlock in nfs " Scott Mayhew
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Mayhew @ 2017-11-20 21:28 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: anna.schumaker, linux-nfs

On Tue, 07 Nov 2017, Trond Myklebust wrote:

> On Tue, 2017-11-07 at 13:26 -0500, Scott Mayhew wrote:
> > On Tue, 07 Nov 2017, Trond Myklebust wrote:
> > 
> > > On Tue, 2017-11-07 at 09:29 -0500, Scott Mayhew wrote:
> > > > The following deadlock can occur between a process waiting for a
> > > > client
> > > > to initialize in while walking the client list and another
> > > > process
> > > > waiting for the nfs_clid_init_mutex so it can initialize that
> > > > client:
> > > > 
> > > > Process 1                               Process 2
> > > > ---------                               ---------
> > > > spin_lock(&nn->nfs_client_lock);
> > > > list_add_tail(&CLIENTA->cl_share_link,
> > > >         &nn->nfs_client_list);
> > > > spin_unlock(&nn->nfs_client_lock);
> > > >                                         spin_lock(&nn-
> > > > > nfs_client_lock);
> > > > 
> > > >                                         list_add_tail(&CLIENTB-
> > > > > cl_share_link,
> > > > 
> > > >                                                 &nn-
> > > > > nfs_client_list);
> > > > 
> > > >                                         spin_unlock(&nn-
> > > > > nfs_client_lock);
> > > > 
> > > >                                         mutex_lock(&nfs_clid_init
> > > > _mut
> > > > ex);
> > > >                                         nfs41_walk_client_list(cl
> > > > p,
> > > > result, cred);
> > > >                                         nfs_wait_client_init_comp
> > > > lete
> > > > (CLIENTA);
> > > > (waiting for nfs_clid_init_mutex)
> > > > 
> > > > Make sure nfs_match_client() only evaluates clients that have
> > > > completed
> > > > initialization in order to prevent that deadlock.
> > > > 
> > > > Signed-off-by: Scott Mayhew <smayhew@redhat.com>
> > > > ---
> > > >  fs/nfs/client.c | 9 +++++++++
> > > >  1 file changed, 9 insertions(+)
> > > > 
> > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > index 22880ef..8b093994 100644
> > > > --- a/fs/nfs/client.c
> > > > +++ b/fs/nfs/client.c
> > > > @@ -291,12 +291,21 @@ static struct nfs_client
> > > > *nfs_match_client(const struct nfs_client_initdata *dat
> > > >  	const struct sockaddr *sap = data->addr;
> > > >  	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
> > > >  
> > > > +again:
> > > >  	list_for_each_entry(clp, &nn->nfs_client_list,
> > > > cl_share_link) {
> > > >  	        const struct sockaddr *clap = (struct sockaddr
> > > > *)&clp->cl_addr;
> > > >  		/* Don't match clients that failed to initialise
> > > > properly */
> > > >  		if (clp->cl_cons_state < 0)
> > > >  			continue;
> > > >  
> > > > +		if (clp->cl_minorversion > 0 &&
> > > > +				clp->cl_cons_state >
> > > > NFS_CS_READY) {
> > > > +			spin_unlock(&nn->nfs_client_lock);
> > > > +			nfs_wait_client_init_complete(clp);
> > > > +			spin_lock(&nn->nfs_client_lock);
> > > > +			goto again;
> > > > +		}
> > > > +
> > > >  		/* Different NFS versions cannot share the same
> > > > nfs_client */
> > > >  		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
> > > >  			continue;
> > > 
> > > Why the test for clp->cl_minorversion? What's so minor version
> > > specific
> > > about any of this?
> > 
> > The deadlock doesn't occur with v4.0 clients because those are being
> > marked NFS_CS_READY in nfs4_client_client(), before the trunking
> > detection
> 
> Sure, but the root cause you are asserting is that the nfs_client has
> not finished initialising. What is minorversion-specific (or even
> NFSv4-specific) about that?

It's NFSv4-specific because one of the processes involved in the
deadlock (the one waiting on the nfs_client_active_wq while holding the 
nfs_clid_init_mutex) is performing trunking detection, which is
NFSv4-specific.

Anyways, this patch is no good because it breaks when issuing mounts
against multiple IPs of a multi-homed NFS server.  I don't have a
reference on the the client I'm waiting on, so if the process doing
trunking detection with that client finds and uses an existing client,
then the client I'm waiting on goes away (and even if I had a reference
on it, if the process doing trunking detection finds and uses an
existing client, there's nothing that would mark the client that I'm
waiting for ready).

I have another approach that fixes the issue.  It's kinda ugly but I'm
going to post it anyway because I don't really have any other ideas.

Also I forgot to include the reproducer info, so here it is:

1 nfs client, 2 nfs servers

on nfs servers:
    # for i in $(seq 1 25) ; do mkdir /exports/dir$i ; done
    # echo "/exports *(rw,no_root_squash)" >> /etc/exports
    # exportfs -ar

on nfs client:
    # for i in $(seq 1 25) ; do mkdir -p /mnt/test$i ; done

    # hosts=(server1 server2)
    # for i in $(seq 1 25) ; do
        hostnum=$(($i % 2))
        mount.nfs ${hosts[$hostnum]}:/exports/dir$i /mnt/test$i -o vers=4,minorversion=1 &
      done

-Scott

> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] nfs: fix a deadlock in nfs client initialization
  2017-11-20 21:28       ` Scott Mayhew
@ 2017-11-20 21:41         ` Scott Mayhew
  2017-11-29 20:16           ` Anna Schumaker
  2017-11-29 20:50           ` Trond Myklebust
  0 siblings, 2 replies; 14+ messages in thread
From: Scott Mayhew @ 2017-11-20 21:41 UTC (permalink / raw)
  To: trond.myklebust, anna.schumaker; +Cc: linux-nfs

The following deadlock can occur between a process waiting for a client
to initialize in while walking the client list and another process
waiting for the nfs_clid_init_mutex so it can initialize that client:

Process 1                               Process 2
---------                               ---------
spin_lock(&nn->nfs_client_lock);
list_add_tail(&CLIENTA->cl_share_link,
        &nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
                                        spin_lock(&nn->nfs_client_lock);
                                        list_add_tail(&CLIENTB->cl_share_link,
                                                &nn->nfs_client_list);
                                        spin_unlock(&nn->nfs_client_lock);
                                        mutex_lock(&nfs_clid_init_mutex);
                                        nfs41_walk_client_list(clp, result, cred);
                                        nfs_wait_client_init_complete(CLIENTA);
(waiting for nfs_clid_init_mutex)

Add and initilize the client with the nfs_clid_init_mutex held in
order to prevent that deadlock.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
---
 fs/nfs/client.c    | 21 +++++++++++++++++++--
 fs/nfs/nfs4state.c |  4 ----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1..db38c47 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -60,6 +60,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 static DEFINE_SPINLOCK(nfs_version_lock);
 static DEFINE_MUTEX(nfs_version_mutex);
 static LIST_HEAD(nfs_versions);
+static DEFINE_MUTEX(nfs_clid_init_mutex);
 
 /*
  * RPC cruft for NFS
@@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
  */
 struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 {
-	struct nfs_client *clp, *new = NULL;
+	struct nfs_client *clp, *new = NULL, *result = NULL;
 	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
 	const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
 
@@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 			return nfs_found_client(cl_init, clp);
 		}
 		if (new) {
+			/* add and initialize the client with the
+			 * nfs_clid_init_mutex held to prevent a deadlock
+			 * with the server trunking detection
+			 */
+			spin_unlock(&nn->nfs_client_lock);
+			mutex_lock(&nfs_clid_init_mutex);
+			spin_lock(&nn->nfs_client_lock);
+			clp = nfs_match_client(cl_init);
+			if (clp) {
+				spin_unlock(&nn->nfs_client_lock);
+				mutex_unlock(&nfs_clid_init_mutex);
+				new->rpc_ops->free_client(new);
+				return nfs_found_client(cl_init, clp);
+			}
 			list_add_tail(&new->cl_share_link,
 					&nn->nfs_client_list);
 			spin_unlock(&nn->nfs_client_lock);
 			new->cl_flags = cl_init->init_flags;
-			return rpc_ops->init_client(new, cl_init);
+			result = rpc_ops->init_client(new, cl_init);
+			mutex_unlock(&nfs_clid_init_mutex);
+			return result;
 		}
 
 		spin_unlock(&nn->nfs_client_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 54fd56d..668164e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = {
 	.type = NFS4_INVALID_STATEID_TYPE,
 };
 
-static DEFINE_MUTEX(nfs_clid_init_mutex);
-
 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_setclientid_res clid = {
@@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
 	clnt = clp->cl_rpcclient;
 	i = 0;
 
-	mutex_lock(&nfs_clid_init_mutex);
 again:
 	status  = -ENOENT;
 	cred = nfs4_get_clid_cred(clp);
@@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
 	}
 
 out_unlock:
-	mutex_unlock(&nfs_clid_init_mutex);
 	dprintk("NFS: %s: status = %d\n", __func__, status);
 	return status;
 }
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs client initialization
  2017-11-20 21:41         ` [PATCH] nfs: fix a deadlock in nfs " Scott Mayhew
@ 2017-11-29 20:16           ` Anna Schumaker
  2017-11-29 20:50           ` Trond Myklebust
  1 sibling, 0 replies; 14+ messages in thread
From: Anna Schumaker @ 2017-11-29 20:16 UTC (permalink / raw)
  To: Scott Mayhew, trond.myklebust; +Cc: linux-nfs

Hi Scott,

On 11/20/2017 04:41 PM, Scott Mayhew wrote:
> The following deadlock can occur between a process waiting for a client
> to initialize in while walking the client list and another process
> waiting for the nfs_clid_init_mutex so it can initialize that client:
> 
> Process 1                               Process 2
> ---------                               ---------
> spin_lock(&nn->nfs_client_lock);
> list_add_tail(&CLIENTA->cl_share_link,
>         &nn->nfs_client_list);
> spin_unlock(&nn->nfs_client_lock);
>                                         spin_lock(&nn->nfs_client_lock);
>                                         list_add_tail(&CLIENTB->cl_share_link,
>                                                 &nn->nfs_client_list);
>                                         spin_unlock(&nn->nfs_client_lock);
>                                         mutex_lock(&nfs_clid_init_mutex);
>                                         nfs41_walk_client_list(clp, result, cred);
>                                         nfs_wait_client_init_complete(CLIENTA);
> (waiting for nfs_clid_init_mutex)
> 
> Add and initilize the client with the nfs_clid_init_mutex held in
> order to prevent that deadlock.
> 
> Signed-off-by: Scott Mayhew <smayhew@redhat.com>
> ---
>  fs/nfs/client.c    | 21 +++++++++++++++++++--
>  fs/nfs/nfs4state.c |  4 ----
>  2 files changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 0ac2fb1..db38c47 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -60,6 +60,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
>  static DEFINE_SPINLOCK(nfs_version_lock);
>  static DEFINE_MUTEX(nfs_version_mutex);
>  static LIST_HEAD(nfs_versions);
> +static DEFINE_MUTEX(nfs_clid_init_mutex);
>  
>  /*
>   * RPC cruft for NFS
> @@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
>   */
>  struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
>  {
> -	struct nfs_client *clp, *new = NULL;
> +	struct nfs_client *clp, *new = NULL, *result = NULL;
>  	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
>  	const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
>  
> @@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
>  			return nfs_found_client(cl_init, clp);
>  		}
>  		if (new) {
> +			/* add and initialize the client with the
> +			 * nfs_clid_init_mutex held to prevent a deadlock
> +			 * with the server trunking detection
> +			 */
> +			spin_unlock(&nn->nfs_client_lock);
> +			mutex_lock(&nfs_clid_init_mutex);
> +			spin_lock(&nn->nfs_client_lock);
> +			clp = nfs_match_client(cl_init);
> +			if (clp) {
> +				spin_unlock(&nn->nfs_client_lock);
> +				mutex_unlock(&nfs_clid_init_mutex);
> +				new->rpc_ops->free_client(new);
> +				return nfs_found_client(cl_init, clp);
> +			}

Is there any reason you can't just grab the nfs_clid_init_mutex at the very beginning of the do/while loop, right before getting the nn->nfs_client_lock?  Then you wouldn't need to repeat the nfs_match_client() check, which might help clean the code up a bit.

Thanks,
Anna


>  			list_add_tail(&new->cl_share_link,
>  					&nn->nfs_client_list);
>  			spin_unlock(&nn->nfs_client_lock);
>  			new->cl_flags = cl_init->init_flags;
> -			return rpc_ops->init_client(new, cl_init);
> +			result = rpc_ops->init_client(new, cl_init);
> +			mutex_unlock(&nfs_clid_init_mutex);
> +			return result;
>  		}
>  
>  		spin_unlock(&nn->nfs_client_lock);
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 54fd56d..668164e 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = {
>  	.type = NFS4_INVALID_STATEID_TYPE,
>  };
>  
> -static DEFINE_MUTEX(nfs_clid_init_mutex);
> -
>  int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
>  {
>  	struct nfs4_setclientid_res clid = {
> @@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
>  	clnt = clp->cl_rpcclient;
>  	i = 0;
>  
> -	mutex_lock(&nfs_clid_init_mutex);
>  again:
>  	status  = -ENOENT;
>  	cred = nfs4_get_clid_cred(clp);
> @@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
>  	}
>  
>  out_unlock:
> -	mutex_unlock(&nfs_clid_init_mutex);
>  	dprintk("NFS: %s: status = %d\n", __func__, status);
>  	return status;
>  }
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs client initialization
  2017-11-20 21:41         ` [PATCH] nfs: fix a deadlock in nfs " Scott Mayhew
  2017-11-29 20:16           ` Anna Schumaker
@ 2017-11-29 20:50           ` Trond Myklebust
  2017-11-30 14:46             ` Scott Mayhew
  1 sibling, 1 reply; 14+ messages in thread
From: Trond Myklebust @ 2017-11-29 20:50 UTC (permalink / raw)
  To: anna.schumaker, smayhew; +Cc: linux-nfs

T24gTW9uLCAyMDE3LTExLTIwIGF0IDE2OjQxIC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+
IFRoZSBmb2xsb3dpbmcgZGVhZGxvY2sgY2FuIG9jY3VyIGJldHdlZW4gYSBwcm9jZXNzIHdhaXRp
bmcgZm9yIGENCj4gY2xpZW50DQo+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fsa2luZyB0aGUg
Y2xpZW50IGxpc3QgYW5kIGFub3RoZXIgcHJvY2Vzcw0KPiB3YWl0aW5nIGZvciB0aGUgbmZzX2Ns
aWRfaW5pdF9tdXRleCBzbyBpdCBjYW4gaW5pdGlhbGl6ZSB0aGF0IGNsaWVudDoNCj4gDQo+IFBy
b2Nlc3MgMSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBQcm9jZXNzIDINCj4gLS0tLS0t
LS0tICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiBzcGluX2xvY2so
Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiBsaXN0X2FkZF90YWlsKCZDTElFTlRBLT5jbF9zaGFy
ZV9saW5rLA0KPiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gc3Bpbl91bmxvY2so
Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgc3Bpbl9sb2NrKCZubi0NCj4gPm5mc19jbGllbnRfbG9jayk7DQo+ICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsaXN0X2FkZF90YWlsKCZDTElFTlRCLQ0K
PiA+Y2xfc2hhcmVfbGluaywNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgJm5uLQ0KPiA+bmZzX2NsaWVudF9saXN0KTsNCj4gICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgIHNwaW5fdW5sb2NrKCZubi0NCj4gPm5mc19jbGllbnRf
bG9jayk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRleF9s
b2NrKCZuZnNfY2xpZF9pbml0X211dA0KPiBleCk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICBuZnM0MV93YWxrX2NsaWVudF9saXN0KGNscCwNCj4gcmVzdWx0LCBj
cmVkKTsNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0
X2NsaWVudF9pbml0X2NvbXBsZXRlDQo+IChDTElFTlRBKTsNCj4gKHdhaXRpbmcgZm9yIG5mc19j
bGlkX2luaXRfbXV0ZXgpDQo+IA0KPiBBZGQgYW5kIGluaXRpbGl6ZSB0aGUgY2xpZW50IHdpdGgg
dGhlIG5mc19jbGlkX2luaXRfbXV0ZXggaGVsZCBpbg0KPiBvcmRlciB0byBwcmV2ZW50IHRoYXQg
ZGVhZGxvY2suDQo+IA0KPiBTaWduZWQtb2ZmLWJ5OiBTY290dCBNYXloZXcgPHNtYXloZXdAcmVk
aGF0LmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvY2xpZW50LmMgICAgfCAyMSArKysrKysrKysrKysr
KysrKysrLS0NCj4gIGZzL25mcy9uZnM0c3RhdGUuYyB8ICA0IC0tLS0NCj4gIDIgZmlsZXMgY2hh
bmdlZCwgMTkgaW5zZXJ0aW9ucygrKSwgNiBkZWxldGlvbnMoLSkNCj4gDQo+IGRpZmYgLS1naXQg
YS9mcy9uZnMvY2xpZW50LmMgYi9mcy9uZnMvY2xpZW50LmMNCj4gaW5kZXggMGFjMmZiMS4uZGIz
OGM0NyAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL2NsaWVudC5jDQo+ICsrKyBiL2ZzL25mcy9jbGll
bnQuYw0KPiBAQCAtNjAsNiArNjAsNyBAQCBzdGF0aWMNCj4gREVDTEFSRV9XQUlUX1FVRVVFX0hF
QUQobmZzX2NsaWVudF9hY3RpdmVfd3EpOw0KPiAgc3RhdGljIERFRklORV9TUElOTE9DSyhuZnNf
dmVyc2lvbl9sb2NrKTsNCj4gIHN0YXRpYyBERUZJTkVfTVVURVgobmZzX3ZlcnNpb25fbXV0ZXgp
Ow0KPiAgc3RhdGljIExJU1RfSEVBRChuZnNfdmVyc2lvbnMpOw0KPiArc3RhdGljIERFRklORV9N
VVRFWChuZnNfY2xpZF9pbml0X211dGV4KTsNCj4gIA0KPiAgLyoNCj4gICAqIFJQQyBjcnVmdCBm
b3IgTkZTDQo+IEBAIC0zODYsNyArMzg3LDcgQEAgbmZzX2ZvdW5kX2NsaWVudChjb25zdCBzdHJ1
Y3QgbmZzX2NsaWVudF9pbml0ZGF0YQ0KPiAqY2xfaW5pdCwNCj4gICAqLw0KPiAgc3RydWN0IG5m
c19jbGllbnQgKm5mc19nZXRfY2xpZW50KGNvbnN0IHN0cnVjdCBuZnNfY2xpZW50X2luaXRkYXRh
DQo+ICpjbF9pbml0KQ0KPiAgew0KPiAtCXN0cnVjdCBuZnNfY2xpZW50ICpjbHAsICpuZXcgPSBO
VUxMOw0KPiArCXN0cnVjdCBuZnNfY2xpZW50ICpjbHAsICpuZXcgPSBOVUxMLCAqcmVzdWx0ID0g
TlVMTDsNCj4gIAlzdHJ1Y3QgbmZzX25ldCAqbm4gPSBuZXRfZ2VuZXJpYyhjbF9pbml0LT5uZXQs
IG5mc19uZXRfaWQpOw0KPiAgCWNvbnN0IHN0cnVjdCBuZnNfcnBjX29wcyAqcnBjX29wcyA9IGNs
X2luaXQtPm5mc19tb2QtDQo+ID5ycGNfb3BzOw0KPiAgDQo+IEBAIC00MDcsMTEgKzQwOCwyNyBA
QCBzdHJ1Y3QgbmZzX2NsaWVudCAqbmZzX2dldF9jbGllbnQoY29uc3Qgc3RydWN0DQo+IG5mc19j
bGllbnRfaW5pdGRhdGEgKmNsX2luaXQpDQo+ICAJCQlyZXR1cm4gbmZzX2ZvdW5kX2NsaWVudChj
bF9pbml0LCBjbHApOw0KPiAgCQl9DQo+ICAJCWlmIChuZXcpIHsNCj4gKwkJCS8qIGFkZCBhbmQg
aW5pdGlhbGl6ZSB0aGUgY2xpZW50IHdpdGggdGhlDQo+ICsJCQkgKiBuZnNfY2xpZF9pbml0X211
dGV4IGhlbGQgdG8gcHJldmVudCBhDQo+IGRlYWRsb2NrDQo+ICsJCQkgKiB3aXRoIHRoZSBzZXJ2
ZXIgdHJ1bmtpbmcgZGV0ZWN0aW9uDQo+ICsJCQkgKi8NCj4gKwkJCXNwaW5fdW5sb2NrKCZubi0+
bmZzX2NsaWVudF9sb2NrKTsNCj4gKwkJCW11dGV4X2xvY2soJm5mc19jbGlkX2luaXRfbXV0ZXgp
Ow0KPiArCQkJc3Bpbl9sb2NrKCZubi0+bmZzX2NsaWVudF9sb2NrKTsNCj4gKwkJCWNscCA9IG5m
c19tYXRjaF9jbGllbnQoY2xfaW5pdCk7DQo+ICsJCQlpZiAoY2xwKSB7DQo+ICsJCQkJc3Bpbl91
bmxvY2soJm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiArCQkJCW11dGV4X3VubG9jaygmbmZzX2Ns
aWRfaW5pdF9tdXRleCk7DQo+ICsJCQkJbmV3LT5ycGNfb3BzLT5mcmVlX2NsaWVudChuZXcpOw0K
PiArCQkJCXJldHVybiBuZnNfZm91bmRfY2xpZW50KGNsX2luaXQsDQo+IGNscCk7DQo+ICsJCQl9
DQo+ICAJCQlsaXN0X2FkZF90YWlsKCZuZXctPmNsX3NoYXJlX2xpbmssDQo+ICAJCQkJCSZubi0+
bmZzX2NsaWVudF9saXN0KTsNCj4gIAkJCXNwaW5fdW5sb2NrKCZubi0+bmZzX2NsaWVudF9sb2Nr
KTsNCj4gIAkJCW5ldy0+Y2xfZmxhZ3MgPSBjbF9pbml0LT5pbml0X2ZsYWdzOw0KPiAtCQkJcmV0
dXJuIHJwY19vcHMtPmluaXRfY2xpZW50KG5ldywgY2xfaW5pdCk7DQo+ICsJCQlyZXN1bHQgPSBy
cGNfb3BzLT5pbml0X2NsaWVudChuZXcsIGNsX2luaXQpOw0KPiArCQkJbXV0ZXhfdW5sb2NrKCZu
ZnNfY2xpZF9pbml0X211dGV4KTsNCj4gKwkJCXJldHVybiByZXN1bHQ7DQo+ICAJCX0NCj4gIA0K
PiAgCQlzcGluX3VubG9jaygmbm4tPm5mc19jbGllbnRfbG9jayk7DQo+IGRpZmYgLS1naXQgYS9m
cy9uZnMvbmZzNHN0YXRlLmMgYi9mcy9uZnMvbmZzNHN0YXRlLmMNCj4gaW5kZXggNTRmZDU2ZC4u
NjY4MTY0ZSAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL25mczRzdGF0ZS5jDQo+ICsrKyBiL2ZzL25m
cy9uZnM0c3RhdGUuYw0KPiBAQCAtNzcsOCArNzcsNiBAQCBjb25zdCBuZnM0X3N0YXRlaWQgaW52
YWxpZF9zdGF0ZWlkID0gew0KPiAgCS50eXBlID0gTkZTNF9JTlZBTElEX1NUQVRFSURfVFlQRSwN
Cj4gIH07DQo+ICANCj4gLXN0YXRpYyBERUZJTkVfTVVURVgobmZzX2NsaWRfaW5pdF9tdXRleCk7
DQo+IC0NCj4gIGludCBuZnM0X2luaXRfY2xpZW50aWQoc3RydWN0IG5mc19jbGllbnQgKmNscCwg
c3RydWN0IHJwY19jcmVkDQo+ICpjcmVkKQ0KPiAgew0KPiAgCXN0cnVjdCBuZnM0X3NldGNsaWVu
dGlkX3JlcyBjbGlkID0gew0KPiBAQCAtMjE2NCw3ICsyMTYyLDYgQEAgaW50IG5mczRfZGlzY292
ZXJfc2VydmVyX3RydW5raW5nKHN0cnVjdA0KPiBuZnNfY2xpZW50ICpjbHAsDQo+ICAJY2xudCA9
IGNscC0+Y2xfcnBjY2xpZW50Ow0KPiAgCWkgPSAwOw0KPiAgDQo+IC0JbXV0ZXhfbG9jaygmbmZz
X2NsaWRfaW5pdF9tdXRleCk7DQo+ICBhZ2FpbjoNCj4gIAlzdGF0dXMgID0gLUVOT0VOVDsNCj4g
IAljcmVkID0gbmZzNF9nZXRfY2xpZF9jcmVkKGNscCk7DQo+IEBAIC0yMjMyLDcgKzIyMjksNiBA
QCBpbnQgbmZzNF9kaXNjb3Zlcl9zZXJ2ZXJfdHJ1bmtpbmcoc3RydWN0DQo+IG5mc19jbGllbnQg
KmNscCwNCj4gIAl9DQo+ICANCj4gIG91dF91bmxvY2s6DQo+IC0JbXV0ZXhfdW5sb2NrKCZuZnNf
Y2xpZF9pbml0X211dGV4KTsNCj4gIAlkcHJpbnRrKCJORlM6ICVzOiBzdGF0dXMgPSAlZFxuIiwg
X19mdW5jX18sIHN0YXR1cyk7DQo+ICAJcmV0dXJuIHN0YXR1czsNCj4gIH0NCg0KWW91ciBpbml0
aWFsIGZpeCB3YXMgZmluZS4gSXQganVzdCBuZWVkZWQgMiBjaGFuZ2VzOg0KDQoxKSBSZW1vdmUg
dGhlIHRlc3QgZm9yICdjbHAtPmNsX21pbm9ydmVyc2lvbiA+IDAnLg0KMikgUmVmY291bnQgdGhl
IHN0cnVjdCBuZnNfY2xpZW50IHdoaWxlIHlvdSBhcmUgd2FpdGluZyBmb3IgaXQgdG8gYmUNCmlu
aXRpYWxpc2VkLg0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWlu
dGFpbmVyLCBQcmltYXJ5RGF0YQ0KdHJvbmQubXlrbGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfs: fix a deadlock in nfs client initialization
  2017-11-29 20:50           ` Trond Myklebust
@ 2017-11-30 14:46             ` Scott Mayhew
  2017-11-30 22:21               ` [PATCH v2] " Scott Mayhew
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Mayhew @ 2017-11-30 14:46 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: anna.schumaker, linux-nfs

On Wed, 29 Nov 2017, Trond Myklebust wrote:

> On Mon, 2017-11-20 at 16:41 -0500, Scott Mayhew wrote:
> > The following deadlock can occur between a process waiting for a
> > client
> > to initialize in while walking the client list and another process
> > waiting for the nfs_clid_init_mutex so it can initialize that client:
> > 
> > Process 1                               Process 2
> > ---------                               ---------
> > spin_lock(&nn->nfs_client_lock);
> > list_add_tail(&CLIENTA->cl_share_link,
> >         &nn->nfs_client_list);
> > spin_unlock(&nn->nfs_client_lock);
> >                                         spin_lock(&nn-
> > >nfs_client_lock);
> >                                         list_add_tail(&CLIENTB-
> > >cl_share_link,
> >                                                 &nn-
> > >nfs_client_list);
> >                                         spin_unlock(&nn-
> > >nfs_client_lock);
> >                                         mutex_lock(&nfs_clid_init_mut
> > ex);
> >                                         nfs41_walk_client_list(clp,
> > result, cred);
> >                                         nfs_wait_client_init_complete
> > (CLIENTA);
> > (waiting for nfs_clid_init_mutex)
> > 
> > Add and initilize the client with the nfs_clid_init_mutex held in
> > order to prevent that deadlock.
> > 
> > Signed-off-by: Scott Mayhew <smayhew@redhat.com>
> > ---
> >  fs/nfs/client.c    | 21 +++++++++++++++++++--
> >  fs/nfs/nfs4state.c |  4 ----
> >  2 files changed, 19 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 0ac2fb1..db38c47 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -60,6 +60,7 @@ static
> > DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
> >  static DEFINE_SPINLOCK(nfs_version_lock);
> >  static DEFINE_MUTEX(nfs_version_mutex);
> >  static LIST_HEAD(nfs_versions);
> > +static DEFINE_MUTEX(nfs_clid_init_mutex);
> >  
> >  /*
> >   * RPC cruft for NFS
> > @@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata
> > *cl_init,
> >   */
> >  struct nfs_client *nfs_get_client(const struct nfs_client_initdata
> > *cl_init)
> >  {
> > -	struct nfs_client *clp, *new = NULL;
> > +	struct nfs_client *clp, *new = NULL, *result = NULL;
> >  	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
> >  	const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod-
> > >rpc_ops;
> >  
> > @@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct
> > nfs_client_initdata *cl_init)
> >  			return nfs_found_client(cl_init, clp);
> >  		}
> >  		if (new) {
> > +			/* add and initialize the client with the
> > +			 * nfs_clid_init_mutex held to prevent a
> > deadlock
> > +			 * with the server trunking detection
> > +			 */
> > +			spin_unlock(&nn->nfs_client_lock);
> > +			mutex_lock(&nfs_clid_init_mutex);
> > +			spin_lock(&nn->nfs_client_lock);
> > +			clp = nfs_match_client(cl_init);
> > +			if (clp) {
> > +				spin_unlock(&nn->nfs_client_lock);
> > +				mutex_unlock(&nfs_clid_init_mutex);
> > +				new->rpc_ops->free_client(new);
> > +				return nfs_found_client(cl_init,
> > clp);
> > +			}
> >  			list_add_tail(&new->cl_share_link,
> >  					&nn->nfs_client_list);
> >  			spin_unlock(&nn->nfs_client_lock);
> >  			new->cl_flags = cl_init->init_flags;
> > -			return rpc_ops->init_client(new, cl_init);
> > +			result = rpc_ops->init_client(new, cl_init);
> > +			mutex_unlock(&nfs_clid_init_mutex);
> > +			return result;
> >  		}
> >  
> >  		spin_unlock(&nn->nfs_client_lock);
> > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> > index 54fd56d..668164e 100644
> > --- a/fs/nfs/nfs4state.c
> > +++ b/fs/nfs/nfs4state.c
> > @@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = {
> >  	.type = NFS4_INVALID_STATEID_TYPE,
> >  };
> >  
> > -static DEFINE_MUTEX(nfs_clid_init_mutex);
> > -
> >  int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred
> > *cred)
> >  {
> >  	struct nfs4_setclientid_res clid = {
> > @@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct
> > nfs_client *clp,
> >  	clnt = clp->cl_rpcclient;
> >  	i = 0;
> >  
> > -	mutex_lock(&nfs_clid_init_mutex);
> >  again:
> >  	status  = -ENOENT;
> >  	cred = nfs4_get_clid_cred(clp);
> > @@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct
> > nfs_client *clp,
> >  	}
> >  
> >  out_unlock:
> > -	mutex_unlock(&nfs_clid_init_mutex);
> >  	dprintk("NFS: %s: status = %d\n", __func__, status);
> >  	return status;
> >  }
> 
> Your initial fix was fine. It just needed 2 changes:
> 
> 1) Remove the test for 'clp->cl_minorversion > 0'.
> 2) Refcount the struct nfs_client while you are waiting for it to be
> initialised.

I guess I didn't explain it well enough in the email but I actually did
try that already and I had problems with a multi homed NFS server.
Once the first process finds and uses an existing client, who's going to
mark the old client ready so that the other processes finish waiting? 
Maybe what I need is to mark the old client ready with an error like
-EPERM or something, i.e.

---8<---
        error = nfs4_discover_server_trunking(clp, &old);
        if (error < 0)
                goto error;

        if (clp != old)
                clp->cl_preserve_clid = true;
        nfs_mark_client_ready(clp, -EPERM);
        nfs_put_client(clp);
        clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
        return old;
---8<---

I'll test that to see if that makes any difference.

-Scott
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2] nfs: fix a deadlock in nfs client initialization
  2017-11-30 14:46             ` Scott Mayhew
@ 2017-11-30 22:21               ` Scott Mayhew
  2017-12-01  2:36                 ` Trond Myklebust
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Mayhew @ 2017-11-30 22:21 UTC (permalink / raw)
  To: trond.myklebust, anna.schumaker; +Cc: linux-nfs

The following deadlock can occur between a process waiting for a client
to initialize in while walking the client list during nfsv4 server trunking
detection and another process waiting for the nfs_clid_init_mutex so it
can initialize that client:

Process 1                               Process 2
---------                               ---------
spin_lock(&nn->nfs_client_lock);
list_add_tail(&CLIENTA->cl_share_link,
        &nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
                                        spin_lock(&nn->nfs_client_lock);
                                        list_add_tail(&CLIENTB->cl_share_link,
                                                &nn->nfs_client_list);
                                        spin_unlock(&nn->nfs_client_lock);
                                        mutex_lock(&nfs_clid_init_mutex);
                                        nfs41_walk_client_list(clp, result, cred);
                                        nfs_wait_client_init_complete(CLIENTA);
(waiting for nfs_clid_init_mutex)

Make sure nfs_match_client() only evaluates clients that have completed
initialization in order to prevent that deadlock.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
---
 fs/nfs/client.c     | 11 +++++++++++
 fs/nfs/nfs4client.c |  9 ++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1..7e42380 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	const struct sockaddr *sap = data->addr;
 	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
 
+again:
 	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
 	        const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 		/* Don't match clients that failed to initialise properly */
 		if (clp->cl_cons_state < 0)
 			continue;
 
+		/* If a client is still initializing then we need to wait */
+		if (clp->cl_cons_state > NFS_CS_READY) {
+			spin_unlock(&nn->nfs_client_lock);
+			refcount_inc(&clp->cl_count);
+			nfs_wait_client_init_complete(clp);
+			nfs_put_client(clp);
+			spin_lock(&nn->nfs_client_lock);
+			goto again;
+		}
+
 		/* Different NFS versions cannot share the same nfs_client */
 		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
 			continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 12bbab0..39dd39c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -411,8 +411,15 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (error < 0)
 		goto error;
 
-	if (clp != old)
+	if (clp != old) {
 		clp->cl_preserve_clid = true;
+		/*
+		 * Mark the client as having failed initialization so other
+		 * processes walking the nfs_client_list in nfs_match_client()
+		 * won't try to use it.
+		 */
+		nfs_mark_client_ready(clp, -EPERM);
+	}
 	nfs_put_client(clp);
 	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	return old;
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] nfs: fix a deadlock in nfs client initialization
  2017-11-30 22:21               ` [PATCH v2] " Scott Mayhew
@ 2017-12-01  2:36                 ` Trond Myklebust
  2017-12-01 13:10                   ` Scott Mayhew
  0 siblings, 1 reply; 14+ messages in thread
From: Trond Myklebust @ 2017-12-01  2:36 UTC (permalink / raw)
  To: anna.schumaker, smayhew; +Cc: linux-nfs

T24gVGh1LCAyMDE3LTExLTMwIGF0IDE3OjIxIC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+
IFRoZSBmb2xsb3dpbmcgZGVhZGxvY2sgY2FuIG9jY3VyIGJldHdlZW4gYSBwcm9jZXNzIHdhaXRp
bmcgZm9yIGENCj4gY2xpZW50DQo+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fsa2luZyB0aGUg
Y2xpZW50IGxpc3QgZHVyaW5nIG5mc3Y0IHNlcnZlcg0KPiB0cnVua2luZw0KPiBkZXRlY3Rpb24g
YW5kIGFub3RoZXIgcHJvY2VzcyB3YWl0aW5nIGZvciB0aGUgbmZzX2NsaWRfaW5pdF9tdXRleCBz
bw0KPiBpdA0KPiBjYW4gaW5pdGlhbGl6ZSB0aGF0IGNsaWVudDoNCj4gDQo+IFByb2Nlc3MgMSAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICBQcm9jZXNzIDINCj4gLS0tLS0tLS0tICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiBzcGluX2xvY2soJm5uLT5uZnNf
Y2xpZW50X2xvY2spOw0KPiBsaXN0X2FkZF90YWlsKCZDTElFTlRBLT5jbF9zaGFyZV9saW5rLA0K
PiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gc3Bpbl91bmxvY2soJm5uLT5uZnNf
Y2xpZW50X2xvY2spOw0KPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
c3Bpbl9sb2NrKCZubi0NCj4gPm5mc19jbGllbnRfbG9jayk7DQo+ICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICBsaXN0X2FkZF90YWlsKCZDTElFTlRCLQ0KPiA+Y2xfc2hh
cmVfbGluaywNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgJm5uLQ0KPiA+bmZzX2NsaWVudF9saXN0KTsNCj4gICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgIHNwaW5fdW5sb2NrKCZubi0NCj4gPm5mc19jbGllbnRfbG9jayk7DQo+
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRleF9sb2NrKCZuZnNf
Y2xpZF9pbml0X211dA0KPiBleCk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICBuZnM0MV93YWxrX2NsaWVudF9saXN0KGNscCwNCj4gcmVzdWx0LCBjcmVkKTsNCj4g
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0X2NsaWVudF9p
bml0X2NvbXBsZXRlDQo+IChDTElFTlRBKTsNCj4gKHdhaXRpbmcgZm9yIG5mc19jbGlkX2luaXRf
bXV0ZXgpDQo+IA0KPiBNYWtlIHN1cmUgbmZzX21hdGNoX2NsaWVudCgpIG9ubHkgZXZhbHVhdGVz
IGNsaWVudHMgdGhhdCBoYXZlDQo+IGNvbXBsZXRlZA0KPiBpbml0aWFsaXphdGlvbiBpbiBvcmRl
ciB0byBwcmV2ZW50IHRoYXQgZGVhZGxvY2suDQo+IA0KPiBTaWduZWQtb2ZmLWJ5OiBTY290dCBN
YXloZXcgPHNtYXloZXdAcmVkaGF0LmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvY2xpZW50LmMgICAg
IHwgMTEgKysrKysrKysrKysNCj4gIGZzL25mcy9uZnM0Y2xpZW50LmMgfCAgOSArKysrKysrKy0N
Cj4gIDIgZmlsZXMgY2hhbmdlZCwgMTkgaW5zZXJ0aW9ucygrKSwgMSBkZWxldGlvbigtKQ0KPiAN
Cj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9jbGllbnQuYyBiL2ZzL25mcy9jbGllbnQuYw0KPiBpbmRl
eCAwYWMyZmIxLi43ZTQyMzgwIDEwMDY0NA0KPiAtLS0gYS9mcy9uZnMvY2xpZW50LmMNCj4gKysr
IGIvZnMvbmZzL2NsaWVudC5jDQo+IEBAIC0yOTEsMTIgKzI5MSwyMyBAQCBzdGF0aWMgc3RydWN0
IG5mc19jbGllbnQNCj4gKm5mc19tYXRjaF9jbGllbnQoY29uc3Qgc3RydWN0IG5mc19jbGllbnRf
aW5pdGRhdGEgKmRhdA0KPiAgCWNvbnN0IHN0cnVjdCBzb2NrYWRkciAqc2FwID0gZGF0YS0+YWRk
cjsNCj4gIAlzdHJ1Y3QgbmZzX25ldCAqbm4gPSBuZXRfZ2VuZXJpYyhkYXRhLT5uZXQsIG5mc19u
ZXRfaWQpOw0KPiAgDQo+ICthZ2FpbjoNCj4gIAlsaXN0X2Zvcl9lYWNoX2VudHJ5KGNscCwgJm5u
LT5uZnNfY2xpZW50X2xpc3QsDQo+IGNsX3NoYXJlX2xpbmspIHsNCj4gIAkgICAgICAgIGNvbnN0
IHN0cnVjdCBzb2NrYWRkciAqY2xhcCA9IChzdHJ1Y3Qgc29ja2FkZHINCj4gKikmY2xwLT5jbF9h
ZGRyOw0KPiAgCQkvKiBEb24ndCBtYXRjaCBjbGllbnRzIHRoYXQgZmFpbGVkIHRvIGluaXRpYWxp
c2UNCj4gcHJvcGVybHkgKi8NCj4gIAkJaWYgKGNscC0+Y2xfY29uc19zdGF0ZSA8IDApDQo+ICAJ
CQljb250aW51ZTsNCj4gIA0KPiArCQkvKiBJZiBhIGNsaWVudCBpcyBzdGlsbCBpbml0aWFsaXpp
bmcgdGhlbiB3ZSBuZWVkIHRvDQo+IHdhaXQgKi8NCj4gKwkJaWYgKGNscC0+Y2xfY29uc19zdGF0
ZSA+IE5GU19DU19SRUFEWSkgew0KPiArCQkJc3Bpbl91bmxvY2soJm5uLT5uZnNfY2xpZW50X2xv
Y2spOw0KPiArCQkJcmVmY291bnRfaW5jKCZjbHAtPmNsX2NvdW50KTsNCg0KVGhlIHJlZmNvdW50
IG5lZWRzIHRvIGJlIGJ1bXBlZCBiZWZvcmUgZHJvcHBpbmcgdGhlIHNwaW5sb2NrIGFib3ZlLg0K
DQo+ICsJCQluZnNfd2FpdF9jbGllbnRfaW5pdF9jb21wbGV0ZShjbHApOw0KPiArCQkJbmZzX3B1
dF9jbGllbnQoY2xwKTsNCj4gKwkJCXNwaW5fbG9jaygmbm4tPm5mc19jbGllbnRfbG9jayk7DQo+
ICsJCQlnb3RvIGFnYWluOw0KPiArCQl9DQo+ICsNCj4gIAkJLyogRGlmZmVyZW50IE5GUyB2ZXJz
aW9ucyBjYW5ub3Qgc2hhcmUgdGhlIHNhbWUNCj4gbmZzX2NsaWVudCAqLw0KPiAgCQlpZiAoY2xw
LT5ycGNfb3BzICE9IGRhdGEtPm5mc19tb2QtPnJwY19vcHMpDQo+ICAJCQljb250aW51ZTsNCj4g
ZGlmZiAtLWdpdCBhL2ZzL25mcy9uZnM0Y2xpZW50LmMgYi9mcy9uZnMvbmZzNGNsaWVudC5jDQo+
IGluZGV4IDEyYmJhYjAuLjM5ZGQzOWMgMTAwNjQ0DQo+IC0tLSBhL2ZzL25mcy9uZnM0Y2xpZW50
LmMNCj4gKysrIGIvZnMvbmZzL25mczRjbGllbnQuYw0KPiBAQCAtNDExLDggKzQxMSwxNSBAQCBz
dHJ1Y3QgbmZzX2NsaWVudCAqbmZzNF9pbml0X2NsaWVudChzdHJ1Y3QNCj4gbmZzX2NsaWVudCAq
Y2xwLA0KPiAgCWlmIChlcnJvciA8IDApDQo+ICAJCWdvdG8gZXJyb3I7DQo+ICANCj4gLQlpZiAo
Y2xwICE9IG9sZCkNCj4gKwlpZiAoY2xwICE9IG9sZCkgew0KPiAgCQljbHAtPmNsX3ByZXNlcnZl
X2NsaWQgPSB0cnVlOw0KPiArCQkvKg0KPiArCQkgKiBNYXJrIHRoZSBjbGllbnQgYXMgaGF2aW5n
IGZhaWxlZCBpbml0aWFsaXphdGlvbg0KPiBzbyBvdGhlcg0KPiArCQkgKiBwcm9jZXNzZXMgd2Fs
a2luZyB0aGUgbmZzX2NsaWVudF9saXN0IGluDQo+IG5mc19tYXRjaF9jbGllbnQoKQ0KPiArCQkg
KiB3b24ndCB0cnkgdG8gdXNlIGl0Lg0KPiArCQkgKi8NCj4gKwkJbmZzX21hcmtfY2xpZW50X3Jl
YWR5KGNscCwgLUVQRVJNKTsNCg0KWW91IGNhbid0IGRvIHRoaXMgaWYgdGhlIGNsaWVudCB3YXMg
YWxyZWFkeSBtYXJrZWQgYXMgTkZTX0NTX1JFQURZLCBzbw0KeW91IG5lZWQgYXQgbGVhc3QgdG8g
bW92ZSB0aGUgIW5mczRfaGFzX3Nlc3Npb24oKSBjb25kaXRpb24gNCBsaW5lcw0KYWJvdmUgdGhp
cy4NCg0KPiArCX0NCj4gIAluZnNfcHV0X2NsaWVudChjbHApOw0KPiAgCWNsZWFyX2JpdChORlNf
Q1NfVFNNX1BPU1NJQkxFLCAmY2xwLT5jbF9mbGFncyk7DQo+ICAJcmV0dXJuIG9sZDsNCi0tIA0K
VHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXIsIFByaW1hcnlEYXRh
DQp0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] nfs: fix a deadlock in nfs client initialization
  2017-12-01  2:36                 ` Trond Myklebust
@ 2017-12-01 13:10                   ` Scott Mayhew
  2017-12-01 14:42                     ` Trond Myklebust
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Mayhew @ 2017-12-01 13:10 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: anna.schumaker, linux-nfs

On Fri, 01 Dec 2017, Trond Myklebust wrote:

> On Thu, 2017-11-30 at 17:21 -0500, Scott Mayhew wrote:
> > The following deadlock can occur between a process waiting for a
> > client
> > to initialize in while walking the client list during nfsv4 server
> > trunking
> > detection and another process waiting for the nfs_clid_init_mutex so
> > it
> > can initialize that client:
> > 
> > Process 1                               Process 2
> > ---------                               ---------
> > spin_lock(&nn->nfs_client_lock);
> > list_add_tail(&CLIENTA->cl_share_link,
> >         &nn->nfs_client_list);
> > spin_unlock(&nn->nfs_client_lock);
> >                                         spin_lock(&nn-
> > >nfs_client_lock);
> >                                         list_add_tail(&CLIENTB-
> > >cl_share_link,
> >                                                 &nn-
> > >nfs_client_list);
> >                                         spin_unlock(&nn-
> > >nfs_client_lock);
> >                                         mutex_lock(&nfs_clid_init_mut
> > ex);
> >                                         nfs41_walk_client_list(clp,
> > result, cred);
> >                                         nfs_wait_client_init_complete
> > (CLIENTA);
> > (waiting for nfs_clid_init_mutex)
> > 
> > Make sure nfs_match_client() only evaluates clients that have
> > completed
> > initialization in order to prevent that deadlock.
> > 
> > Signed-off-by: Scott Mayhew <smayhew@redhat.com>
> > ---
> >  fs/nfs/client.c     | 11 +++++++++++
> >  fs/nfs/nfs4client.c |  9 ++++++++-
> >  2 files changed, 19 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 0ac2fb1..7e42380 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -291,12 +291,23 @@ static struct nfs_client
> > *nfs_match_client(const struct nfs_client_initdata *dat
> >  	const struct sockaddr *sap = data->addr;
> >  	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
> >  
> > +again:
> >  	list_for_each_entry(clp, &nn->nfs_client_list,
> > cl_share_link) {
> >  	        const struct sockaddr *clap = (struct sockaddr
> > *)&clp->cl_addr;
> >  		/* Don't match clients that failed to initialise
> > properly */
> >  		if (clp->cl_cons_state < 0)
> >  			continue;
> >  
> > +		/* If a client is still initializing then we need to
> > wait */
> > +		if (clp->cl_cons_state > NFS_CS_READY) {
> > +			spin_unlock(&nn->nfs_client_lock);
> > +			refcount_inc(&clp->cl_count);
> 
> The refcount needs to be bumped before dropping the spinlock above.

Okay.
> 
> > +			nfs_wait_client_init_complete(clp);
> > +			nfs_put_client(clp);
> > +			spin_lock(&nn->nfs_client_lock);
> > +			goto again;
> > +		}
> > +
> >  		/* Different NFS versions cannot share the same
> > nfs_client */
> >  		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
> >  			continue;
> > diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> > index 12bbab0..39dd39c 100644
> > --- a/fs/nfs/nfs4client.c
> > +++ b/fs/nfs/nfs4client.c
> > @@ -411,8 +411,15 @@ struct nfs_client *nfs4_init_client(struct
> > nfs_client *clp,
> >  	if (error < 0)
> >  		goto error;
> >  
> > -	if (clp != old)
> > +	if (clp != old) {
> >  		clp->cl_preserve_clid = true;
> > +		/*
> > +		 * Mark the client as having failed initialization
> > so other
> > +		 * processes walking the nfs_client_list in
> > nfs_match_client()
> > +		 * won't try to use it.
> > +		 */
> > +		nfs_mark_client_ready(clp, -EPERM);
> 
> You can't do this if the client was already marked as NFS_CS_READY, so
> you need at least to move the !nfs4_has_session() condition 4 lines
> above this.

How about if I change it to this:

                if (!nfs_client_init_is_complete(clp))
                        nfs_mark_client_ready(clp, -EPERM);

In my earlier testing I had problems whenever I tried moving that
!nfs4_has_session() condition.  

-Scott
> 
> > +	}
> >  	nfs_put_client(clp);
> >  	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
> >  	return old;
> -- 
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] nfs: fix a deadlock in nfs client initialization
  2017-12-01 13:10                   ` Scott Mayhew
@ 2017-12-01 14:42                     ` Trond Myklebust
  2017-12-05 18:55                       ` [PATCH v3] " Scott Mayhew
  0 siblings, 1 reply; 14+ messages in thread
From: Trond Myklebust @ 2017-12-01 14:42 UTC (permalink / raw)
  To: smayhew; +Cc: anna.schumaker, linux-nfs

T24gRnJpLCAyMDE3LTEyLTAxIGF0IDA4OjEwIC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+
IE9uIEZyaSwgMDEgRGVjIDIwMTcsIFRyb25kIE15a2xlYnVzdCB3cm90ZToNCj4gDQo+ID4gT24g
VGh1LCAyMDE3LTExLTMwIGF0IDE3OjIxIC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+ID4g
PiBUaGUgZm9sbG93aW5nIGRlYWRsb2NrIGNhbiBvY2N1ciBiZXR3ZWVuIGEgcHJvY2VzcyB3YWl0
aW5nIGZvciBhDQo+ID4gPiBjbGllbnQNCj4gPiA+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fs
a2luZyB0aGUgY2xpZW50IGxpc3QgZHVyaW5nIG5mc3Y0DQo+ID4gPiBzZXJ2ZXINCj4gPiA+IHRy
dW5raW5nDQo+ID4gPiBkZXRlY3Rpb24gYW5kIGFub3RoZXIgcHJvY2VzcyB3YWl0aW5nIGZvciB0
aGUgbmZzX2NsaWRfaW5pdF9tdXRleA0KPiA+ID4gc28NCj4gPiA+IGl0DQo+ID4gPiBjYW4gaW5p
dGlhbGl6ZSB0aGF0IGNsaWVudDoNCj4gPiA+IA0KPiA+ID4gUHJvY2VzcyAxICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgIFByb2Nlc3MgMg0KPiA+ID4gLS0tLS0tLS0tICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiA+ID4gc3Bpbl9sb2NrKCZubi0+bmZzX2Ns
aWVudF9sb2NrKTsNCj4gPiA+IGxpc3RfYWRkX3RhaWwoJkNMSUVOVEEtPmNsX3NoYXJlX2xpbmss
DQo+ID4gPiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gPiA+IHNwaW5fdW5sb2Nr
KCZubi0+bmZzX2NsaWVudF9sb2NrKTsNCj4gPiA+ICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICBzcGluX2xvY2soJm5uLQ0KPiA+ID4gPiBuZnNfY2xpZW50X2xvY2spOw0K
PiA+ID4gDQo+ID4gPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGlz
dF9hZGRfdGFpbCgmQ0xJRU5UQi0NCj4gPiA+ID4gY2xfc2hhcmVfbGluaywNCj4gPiA+IA0KPiA+
ID4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJm5uLQ0K
PiA+ID4gPiBuZnNfY2xpZW50X2xpc3QpOw0KPiA+ID4gDQo+ID4gPiAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgc3Bpbl91bmxvY2soJm5uLQ0KPiA+ID4gPiBuZnNfY2xp
ZW50X2xvY2spOw0KPiA+ID4gDQo+ID4gPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgbXV0ZXhfbG9jaygmbmZzX2NsaWRfaW5pdA0KPiA+ID4gX211dA0KPiA+ID4gZXgp
Ow0KPiA+ID4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mczQxX3dh
bGtfY2xpZW50X2xpc3QoY2wNCj4gPiA+IHAsDQo+ID4gPiByZXN1bHQsIGNyZWQpOw0KPiA+ID4g
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0X2NsaWVudF9p
bml0X2NvbXANCj4gPiA+IGxldGUNCj4gPiA+IChDTElFTlRBKTsNCj4gPiA+ICh3YWl0aW5nIGZv
ciBuZnNfY2xpZF9pbml0X211dGV4KQ0KPiA+ID4gDQo+ID4gPiBNYWtlIHN1cmUgbmZzX21hdGNo
X2NsaWVudCgpIG9ubHkgZXZhbHVhdGVzIGNsaWVudHMgdGhhdCBoYXZlDQo+ID4gPiBjb21wbGV0
ZWQNCj4gPiA+IGluaXRpYWxpemF0aW9uIGluIG9yZGVyIHRvIHByZXZlbnQgdGhhdCBkZWFkbG9j
ay4NCj4gPiA+IA0KPiA+ID4gU2lnbmVkLW9mZi1ieTogU2NvdHQgTWF5aGV3IDxzbWF5aGV3QHJl
ZGhhdC5jb20+DQo+ID4gPiAtLS0NCj4gPiA+ICBmcy9uZnMvY2xpZW50LmMgICAgIHwgMTEgKysr
KysrKysrKysNCj4gPiA+ICBmcy9uZnMvbmZzNGNsaWVudC5jIHwgIDkgKysrKysrKystDQo+ID4g
PiAgMiBmaWxlcyBjaGFuZ2VkLCAxOSBpbnNlcnRpb25zKCspLCAxIGRlbGV0aW9uKC0pDQo+ID4g
PiANCj4gPiA+IGRpZmYgLS1naXQgYS9mcy9uZnMvY2xpZW50LmMgYi9mcy9uZnMvY2xpZW50LmMN
Cj4gPiA+IGluZGV4IDBhYzJmYjEuLjdlNDIzODAgMTAwNjQ0DQo+ID4gPiAtLS0gYS9mcy9uZnMv
Y2xpZW50LmMNCj4gPiA+ICsrKyBiL2ZzL25mcy9jbGllbnQuYw0KPiA+ID4gQEAgLTI5MSwxMiAr
MjkxLDIzIEBAIHN0YXRpYyBzdHJ1Y3QgbmZzX2NsaWVudA0KPiA+ID4gKm5mc19tYXRjaF9jbGll
bnQoY29uc3Qgc3RydWN0IG5mc19jbGllbnRfaW5pdGRhdGEgKmRhdA0KPiA+ID4gIAljb25zdCBz
dHJ1Y3Qgc29ja2FkZHIgKnNhcCA9IGRhdGEtPmFkZHI7DQo+ID4gPiAgCXN0cnVjdCBuZnNfbmV0
ICpubiA9IG5ldF9nZW5lcmljKGRhdGEtPm5ldCwgbmZzX25ldF9pZCk7DQo+ID4gPiAgDQo+ID4g
PiArYWdhaW46DQo+ID4gPiAgCWxpc3RfZm9yX2VhY2hfZW50cnkoY2xwLCAmbm4tPm5mc19jbGll
bnRfbGlzdCwNCj4gPiA+IGNsX3NoYXJlX2xpbmspIHsNCj4gPiA+ICAJICAgICAgICBjb25zdCBz
dHJ1Y3Qgc29ja2FkZHIgKmNsYXAgPSAoc3RydWN0IHNvY2thZGRyDQo+ID4gPiAqKSZjbHAtPmNs
X2FkZHI7DQo+ID4gPiAgCQkvKiBEb24ndCBtYXRjaCBjbGllbnRzIHRoYXQgZmFpbGVkIHRvIGlu
aXRpYWxpc2UNCj4gPiA+IHByb3Blcmx5ICovDQo+ID4gPiAgCQlpZiAoY2xwLT5jbF9jb25zX3N0
YXRlIDwgMCkNCj4gPiA+ICAJCQljb250aW51ZTsNCj4gPiA+ICANCj4gPiA+ICsJCS8qIElmIGEg
Y2xpZW50IGlzIHN0aWxsIGluaXRpYWxpemluZyB0aGVuIHdlDQo+ID4gPiBuZWVkIHRvDQo+ID4g
PiB3YWl0ICovDQo+ID4gPiArCQlpZiAoY2xwLT5jbF9jb25zX3N0YXRlID4gTkZTX0NTX1JFQURZ
KSB7DQo+ID4gPiArCQkJc3Bpbl91bmxvY2soJm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiA+ID4g
KwkJCXJlZmNvdW50X2luYygmY2xwLT5jbF9jb3VudCk7DQo+ID4gDQo+ID4gVGhlIHJlZmNvdW50
IG5lZWRzIHRvIGJlIGJ1bXBlZCBiZWZvcmUgZHJvcHBpbmcgdGhlIHNwaW5sb2NrIGFib3ZlLg0K
PiANCj4gT2theS4NCj4gPiANCj4gPiA+ICsJCQluZnNfd2FpdF9jbGllbnRfaW5pdF9jb21wbGV0
ZShjbHApOw0KPiA+ID4gKwkJCW5mc19wdXRfY2xpZW50KGNscCk7DQo+ID4gPiArCQkJc3Bpbl9s
b2NrKCZubi0+bmZzX2NsaWVudF9sb2NrKTsNCj4gPiA+ICsJCQlnb3RvIGFnYWluOw0KPiA+ID4g
KwkJfQ0KPiA+ID4gKw0KPiA+ID4gIAkJLyogRGlmZmVyZW50IE5GUyB2ZXJzaW9ucyBjYW5ub3Qg
c2hhcmUgdGhlIHNhbWUNCj4gPiA+IG5mc19jbGllbnQgKi8NCj4gPiA+ICAJCWlmIChjbHAtPnJw
Y19vcHMgIT0gZGF0YS0+bmZzX21vZC0+cnBjX29wcykNCj4gPiA+ICAJCQljb250aW51ZTsNCj4g
PiA+IGRpZmYgLS1naXQgYS9mcy9uZnMvbmZzNGNsaWVudC5jIGIvZnMvbmZzL25mczRjbGllbnQu
Yw0KPiA+ID4gaW5kZXggMTJiYmFiMC4uMzlkZDM5YyAxMDA2NDQNCj4gPiA+IC0tLSBhL2ZzL25m
cy9uZnM0Y2xpZW50LmMNCj4gPiA+ICsrKyBiL2ZzL25mcy9uZnM0Y2xpZW50LmMNCj4gPiA+IEBA
IC00MTEsOCArNDExLDE1IEBAIHN0cnVjdCBuZnNfY2xpZW50ICpuZnM0X2luaXRfY2xpZW50KHN0
cnVjdA0KPiA+ID4gbmZzX2NsaWVudCAqY2xwLA0KPiA+ID4gIAlpZiAoZXJyb3IgPCAwKQ0KPiA+
ID4gIAkJZ290byBlcnJvcjsNCj4gPiA+ICANCj4gPiA+IC0JaWYgKGNscCAhPSBvbGQpDQo+ID4g
PiArCWlmIChjbHAgIT0gb2xkKSB7DQo+ID4gPiAgCQljbHAtPmNsX3ByZXNlcnZlX2NsaWQgPSB0
cnVlOw0KPiA+ID4gKwkJLyoNCj4gPiA+ICsJCSAqIE1hcmsgdGhlIGNsaWVudCBhcyBoYXZpbmcg
ZmFpbGVkDQo+ID4gPiBpbml0aWFsaXphdGlvbg0KPiA+ID4gc28gb3RoZXINCj4gPiA+ICsJCSAq
IHByb2Nlc3NlcyB3YWxraW5nIHRoZSBuZnNfY2xpZW50X2xpc3QgaW4NCj4gPiA+IG5mc19tYXRj
aF9jbGllbnQoKQ0KPiA+ID4gKwkJICogd29uJ3QgdHJ5IHRvIHVzZSBpdC4NCj4gPiA+ICsJCSAq
Lw0KPiA+ID4gKwkJbmZzX21hcmtfY2xpZW50X3JlYWR5KGNscCwgLUVQRVJNKTsNCj4gPiANCj4g
PiBZb3UgY2FuJ3QgZG8gdGhpcyBpZiB0aGUgY2xpZW50IHdhcyBhbHJlYWR5IG1hcmtlZCBhcyBO
RlNfQ1NfUkVBRFksDQo+ID4gc28NCj4gPiB5b3UgbmVlZCBhdCBsZWFzdCB0byBtb3ZlIHRoZSAh
bmZzNF9oYXNfc2Vzc2lvbigpIGNvbmRpdGlvbiA0IGxpbmVzDQo+ID4gYWJvdmUgdGhpcy4NCj4g
DQo+IEhvdyBhYm91dCBpZiBJIGNoYW5nZSBpdCB0byB0aGlzOg0KPiANCj4gICAgICAgICAgICAg
ICAgIGlmICghbmZzX2NsaWVudF9pbml0X2lzX2NvbXBsZXRlKGNscCkpDQo+ICAgICAgICAgICAg
ICAgICAgICAgICAgIG5mc19tYXJrX2NsaWVudF9yZWFkeShjbHAsIC1FUEVSTSk7DQo+IA0KPiBJ
biBteSBlYXJsaWVyIHRlc3RpbmcgSSBoYWQgcHJvYmxlbXMgd2hlbmV2ZXIgSSB0cmllZCBtb3Zp
bmcgdGhhdA0KPiAhbmZzNF9oYXNfc2Vzc2lvbigpIGNvbmRpdGlvbi4gIA0KPiANCg0KV291bGRu
J3QgdGhhdCBtZWFuIHlvdSB3b3VsZCBlbmQgdXAgd2l0aCBkaWZmZXJlbnQgYmVoYXZpb3VyIGZv
ciBORlN2NA0KYW5kIE5GU3Y0LjEgdy5yLnQuIHRydW5raW5nLCB3aXRoIHRoZSBmb3JtZXIgb2Jz
ZXJ2aW5nIHRoZSBjdXJyZW50DQpiZWhhdmlvdXIsIGFuZCBvbmx5IHRoZSBsYXR0ZXIgd29ya2lu
ZyBjb3JyZWN0bHk/DQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1h
aW50YWluZXIsIFByaW1hcnlEYXRhDQp0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v3] nfs: fix a deadlock in nfs client initialization
  2017-12-01 14:42                     ` Trond Myklebust
@ 2017-12-05 18:55                       ` Scott Mayhew
  0 siblings, 0 replies; 14+ messages in thread
From: Scott Mayhew @ 2017-12-05 18:55 UTC (permalink / raw)
  To: trond.myklebust, anna.schumaker; +Cc: linux-nfs

The following deadlock can occur between a process waiting for a client
to initialize in while walking the client list during nfsv4 server trunking
detection and another process waiting for the nfs_clid_init_mutex so it
can initialize that client:

Process 1                               Process 2
---------                               ---------
spin_lock(&nn->nfs_client_lock);
list_add_tail(&CLIENTA->cl_share_link,
        &nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
                                        spin_lock(&nn->nfs_client_lock);
                                        list_add_tail(&CLIENTB->cl_share_link,
                                                &nn->nfs_client_list);
                                        spin_unlock(&nn->nfs_client_lock);
                                        mutex_lock(&nfs_clid_init_mutex);
                                        nfs41_walk_client_list(clp, result, cred);
                                        nfs_wait_client_init_complete(CLIENTA);
(waiting for nfs_clid_init_mutex)

Make sure nfs_match_client() only evaluates clients that have completed
initialization in order to prevent that deadlock.

This patch also fixes v4.0 trunking behavior by not marking the client
NFS_CS_READY until the clientid has been confirmed.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
---
 fs/nfs/client.c     | 11 +++++++++++
 fs/nfs/nfs4client.c | 17 +++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1..b9129e2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	const struct sockaddr *sap = data->addr;
 	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
 
+again:
 	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
 	        const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 		/* Don't match clients that failed to initialise properly */
 		if (clp->cl_cons_state < 0)
 			continue;
 
+		/* If a client is still initializing then we need to wait */
+		if (clp->cl_cons_state > NFS_CS_READY) {
+			refcount_inc(&clp->cl_count);
+			spin_unlock(&nn->nfs_client_lock);
+			nfs_wait_client_init_complete(clp);
+			nfs_put_client(clp);
+			spin_lock(&nn->nfs_client_lock);
+			goto again;
+		}
+
 		/* Different NFS versions cannot share the same nfs_client */
 		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
 			continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 12bbab0..65a7e5d 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (error < 0)
 		goto error;
 
-	if (!nfs4_has_session(clp))
-		nfs_mark_client_ready(clp, NFS_CS_READY);
-
 	error = nfs4_discover_server_trunking(clp, &old);
 	if (error < 0)
 		goto error;
 
-	if (clp != old)
+	if (clp != old) {
 		clp->cl_preserve_clid = true;
+		/*
+		 * Mark the client as having failed initialization so other
+		 * processes walking the nfs_client_list in nfs_match_client()
+		 * won't try to use it.
+		 */
+		nfs_mark_client_ready(clp, -EPERM);
+	}
 	nfs_put_client(clp);
 	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	return old;
@@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
 
+		if (pos == new)
+			goto found;
+
 		status = nfs4_match_client(pos, new, &prev, nn);
 		if (status < 0)
 			goto out_unlock;
@@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		 * way that a SETCLIENTID_CONFIRM to pos can succeed is
 		 * if new and pos point to the same server:
 		 */
+found:
 		refcount_inc(&pos->cl_count);
 		spin_unlock(&nn->nfs_client_lock);
 
@@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		case 0:
 			nfs4_swap_callback_idents(pos, new);
 			pos->cl_confirm = new->cl_confirm;
+			nfs_mark_client_ready(pos, NFS_CS_READY);
 
 			prev = NULL;
 			*result = pos;
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-12-05 18:55 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-07 14:29 [PATCH] nfs: fix a deadlock in nfs v4.1 client initialization Scott Mayhew
2017-11-07 15:30 ` Trond Myklebust
2017-11-07 18:26   ` Scott Mayhew
2017-11-07 18:30     ` Trond Myklebust
2017-11-20 21:28       ` Scott Mayhew
2017-11-20 21:41         ` [PATCH] nfs: fix a deadlock in nfs " Scott Mayhew
2017-11-29 20:16           ` Anna Schumaker
2017-11-29 20:50           ` Trond Myklebust
2017-11-30 14:46             ` Scott Mayhew
2017-11-30 22:21               ` [PATCH v2] " Scott Mayhew
2017-12-01  2:36                 ` Trond Myklebust
2017-12-01 13:10                   ` Scott Mayhew
2017-12-01 14:42                     ` Trond Myklebust
2017-12-05 18:55                       ` [PATCH v3] " Scott Mayhew

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.