Linux-NFS Archive on lore.kernel.org
 help / Atom feed
* [PATCH] svc_run: make sure only one svc_run loop runs in one process
@ 2019-04-09 11:37 xiubli
  2019-05-16  2:55 ` Xiubo Li
  0 siblings, 1 reply; 6+ messages in thread
From: xiubli @ 2019-04-09 11:37 UTC (permalink / raw)
  To: libtirpc-devel; +Cc: linux-nfs, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

In gluster-block project and there are 2 separate threads, both
of which will run the svc_run loop, this could work well in glibc
version, but in libtirpc we are hitting the random crash and stuck
issues.

More detail please see:
https://github.com/gluster/gluster-block/pull/182

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 src/svc_run.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/svc_run.c b/src/svc_run.c
index f40314b..b295755 100644
--- a/src/svc_run.c
+++ b/src/svc_run.c
@@ -38,12 +38,17 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/poll.h>
+#include <syslog.h>
+#include <stdbool.h>
 
 
 #include <rpc/rpc.h>
 #include "rpc_com.h"
 #include <sys/select.h>
 
+static bool svc_loop_running = false;
+static pthread_mutex_t svc_run_lock = PTHREAD_MUTEX_INITIALIZER;
+
 void
 svc_run()
 {
@@ -51,6 +56,16 @@ svc_run()
   struct pollfd *my_pollfd = NULL;
   int last_max_pollfd = 0;
 
+  pthread_mutex_lock(&svc_run_lock);
+  if (svc_loop_running) {
+    pthread_mutex_unlock(&svc_run_lock);
+    syslog (LOG_ERR, "svc_run: svc loop is already running in current process %d", getpid());
+    return;
+  }
+
+  svc_loop_running = true;
+  pthread_mutex_unlock(&svc_run_lock);
+
   for (;;) {
     int max_pollfd = svc_max_pollfd;
     if (max_pollfd == 0 && svc_pollfd == NULL)
@@ -111,4 +126,8 @@ svc_exit()
 	svc_pollfd = NULL;
 	svc_max_pollfd = 0;
 	rwlock_unlock(&svc_fd_lock);
+
+    pthread_mutex_lock(&svc_run_lock);
+    svc_loop_running = false;
+    pthread_mutex_unlock(&svc_run_lock);
 }
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] svc_run: make sure only one svc_run loop runs in one process
  2019-04-09 11:37 [PATCH] svc_run: make sure only one svc_run loop runs in one process xiubli
@ 2019-05-16  2:55 ` Xiubo Li
  2019-06-11 14:54   ` Steve Dickson
  0 siblings, 1 reply; 6+ messages in thread
From: Xiubo Li @ 2019-05-16  2:55 UTC (permalink / raw)
  To: libtirpc-devel; +Cc: linux-nfs

Hey ping.

What's the state of this patch and will it make sense here?

Thanks
BRs

On 2019/4/9 19:37, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> In gluster-block project and there are 2 separate threads, both
> of which will run the svc_run loop, this could work well in glibc
> version, but in libtirpc we are hitting the random crash and stuck
> issues.
>
> More detail please see:
> https://github.com/gluster/gluster-block/pull/182
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   src/svc_run.c | 19 +++++++++++++++++++
>   1 file changed, 19 insertions(+)
>
> diff --git a/src/svc_run.c b/src/svc_run.c
> index f40314b..b295755 100644
> --- a/src/svc_run.c
> +++ b/src/svc_run.c
> @@ -38,12 +38,17 @@
>   #include <string.h>
>   #include <unistd.h>
>   #include <sys/poll.h>
> +#include <syslog.h>
> +#include <stdbool.h>
>   
>   
>   #include <rpc/rpc.h>
>   #include "rpc_com.h"
>   #include <sys/select.h>
>   
> +static bool svc_loop_running = false;
> +static pthread_mutex_t svc_run_lock = PTHREAD_MUTEX_INITIALIZER;
> +
>   void
>   svc_run()
>   {
> @@ -51,6 +56,16 @@ svc_run()
>     struct pollfd *my_pollfd = NULL;
>     int last_max_pollfd = 0;
>   
> +  pthread_mutex_lock(&svc_run_lock);
> +  if (svc_loop_running) {
> +    pthread_mutex_unlock(&svc_run_lock);
> +    syslog (LOG_ERR, "svc_run: svc loop is already running in current process %d", getpid());
> +    return;
> +  }
> +
> +  svc_loop_running = true;
> +  pthread_mutex_unlock(&svc_run_lock);
> +
>     for (;;) {
>       int max_pollfd = svc_max_pollfd;
>       if (max_pollfd == 0 && svc_pollfd == NULL)
> @@ -111,4 +126,8 @@ svc_exit()
>   	svc_pollfd = NULL;
>   	svc_max_pollfd = 0;
>   	rwlock_unlock(&svc_fd_lock);
> +
> +    pthread_mutex_lock(&svc_run_lock);
> +    svc_loop_running = false;
> +    pthread_mutex_unlock(&svc_run_lock);
>   }



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] svc_run: make sure only one svc_run loop runs in one process
  2019-05-16  2:55 ` Xiubo Li
@ 2019-06-11 14:54   ` Steve Dickson
  2019-06-12  3:32     ` Xiubo Li
  0 siblings, 1 reply; 6+ messages in thread
From: Steve Dickson @ 2019-06-11 14:54 UTC (permalink / raw)
  To: Xiubo Li, libtirpc-devel; +Cc: linux-nfs

Sorry for the delay.... 

On 5/15/19 10:55 PM, Xiubo Li wrote:
> Hey ping.
> 
> What's the state of this patch and will it make sense here?
I'm not sure it does make sense.... Shouldn't the mutex lock
be in the call of svc_run()?

steved.

> 
> Thanks
> BRs
> 
> On 2019/4/9 19:37, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In gluster-block project and there are 2 separate threads, both
>> of which will run the svc_run loop, this could work well in glibc
>> version, but in libtirpc we are hitting the random crash and stuck
>> issues.
>>
>> More detail please see:
>> https://github.com/gluster/gluster-block/pull/182
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   src/svc_run.c | 19 +++++++++++++++++++
>>   1 file changed, 19 insertions(+)
>>
>> diff --git a/src/svc_run.c b/src/svc_run.c
>> index f40314b..b295755 100644
>> --- a/src/svc_run.c
>> +++ b/src/svc_run.c
>> @@ -38,12 +38,17 @@
>>   #include <string.h>
>>   #include <unistd.h>
>>   #include <sys/poll.h>
>> +#include <syslog.h>
>> +#include <stdbool.h>
>>       #include <rpc/rpc.h>
>>   #include "rpc_com.h"
>>   #include <sys/select.h>
>>   +static bool svc_loop_running = false;
>> +static pthread_mutex_t svc_run_lock = PTHREAD_MUTEX_INITIALIZER;
>> +
>>   void
>>   svc_run()
>>   {
>> @@ -51,6 +56,16 @@ svc_run()
>>     struct pollfd *my_pollfd = NULL;
>>     int last_max_pollfd = 0;
>>   +  pthread_mutex_lock(&svc_run_lock);
>> +  if (svc_loop_running) {
>> +    pthread_mutex_unlock(&svc_run_lock);
>> +    syslog (LOG_ERR, "svc_run: svc loop is already running in current process %d", getpid());
>> +    return;
>> +  }
>> +
>> +  svc_loop_running = true;
>> +  pthread_mutex_unlock(&svc_run_lock);
>> +
>>     for (;;) {
>>       int max_pollfd = svc_max_pollfd;
>>       if (max_pollfd == 0 && svc_pollfd == NULL)
>> @@ -111,4 +126,8 @@ svc_exit()
>>       svc_pollfd = NULL;
>>       svc_max_pollfd = 0;
>>       rwlock_unlock(&svc_fd_lock);
>> +
>> +    pthread_mutex_lock(&svc_run_lock);
>> +    svc_loop_running = false;
>> +    pthread_mutex_unlock(&svc_run_lock);
>>   }
> 
> 
> 
> 
> _______________________________________________
> Libtirpc-devel mailing list
> Libtirpc-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/libtirpc-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] svc_run: make sure only one svc_run loop runs in one process
  2019-06-11 14:54   ` Steve Dickson
@ 2019-06-12  3:32     ` Xiubo Li
  2019-06-12 16:46       ` Olga Kornievskaia
  0 siblings, 1 reply; 6+ messages in thread
From: Xiubo Li @ 2019-06-12  3:32 UTC (permalink / raw)
  To: Steve Dickson, libtirpc-devel; +Cc: linux-nfs

On 2019/6/11 22:54, Steve Dickson wrote:
> Sorry for the delay....
>
> On 5/15/19 10:55 PM, Xiubo Li wrote:
>> Hey ping.
>>
>> What's the state of this patch and will it make sense here?
> I'm not sure it does make sense.... Shouldn't the mutex lock
> be in the call of svc_run()?

Hi Steve,

Yeah, mutex lock should be in the call of svc_run(). This is exactly 
what I do in this change.

If the libtirpc means to allow only one svc_run() loop in each process, 
so IMO this change is needed. Or if we will allow more than one like the 
glibc version does, so this should be one bug in libtirpc.

Thanks.
BRs
Xiubo


> steved.
>
>> Thanks
>> BRs
>>
>> On 2019/4/9 19:37, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> In gluster-block project and there are 2 separate threads, both
>>> of which will run the svc_run loop, this could work well in glibc
>>> version, but in libtirpc we are hitting the random crash and stuck
>>> issues.
>>>
>>> More detail please see:
>>> https://github.com/gluster/gluster-block/pull/182
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>    src/svc_run.c | 19 +++++++++++++++++++
>>>    1 file changed, 19 insertions(+)
>>>
>>> diff --git a/src/svc_run.c b/src/svc_run.c
>>> index f40314b..b295755 100644
>>> --- a/src/svc_run.c
>>> +++ b/src/svc_run.c
>>> @@ -38,12 +38,17 @@
>>>    #include <string.h>
>>>    #include <unistd.h>
>>>    #include <sys/poll.h>
>>> +#include <syslog.h>
>>> +#include <stdbool.h>
>>>        #include <rpc/rpc.h>
>>>    #include "rpc_com.h"
>>>    #include <sys/select.h>
>>>    +static bool svc_loop_running = false;
>>> +static pthread_mutex_t svc_run_lock = PTHREAD_MUTEX_INITIALIZER;
>>> +
>>>    void
>>>    svc_run()
>>>    {
>>> @@ -51,6 +56,16 @@ svc_run()
>>>      struct pollfd *my_pollfd = NULL;
>>>      int last_max_pollfd = 0;
>>>    +  pthread_mutex_lock(&svc_run_lock);
>>> +  if (svc_loop_running) {
>>> +    pthread_mutex_unlock(&svc_run_lock);
>>> +    syslog (LOG_ERR, "svc_run: svc loop is already running in current process %d", getpid());
>>> +    return;
>>> +  }
>>> +
>>> +  svc_loop_running = true;
>>> +  pthread_mutex_unlock(&svc_run_lock);
>>> +
>>>      for (;;) {
>>>        int max_pollfd = svc_max_pollfd;
>>>        if (max_pollfd == 0 && svc_pollfd == NULL)
>>> @@ -111,4 +126,8 @@ svc_exit()
>>>        svc_pollfd = NULL;
>>>        svc_max_pollfd = 0;
>>>        rwlock_unlock(&svc_fd_lock);
>>> +
>>> +    pthread_mutex_lock(&svc_run_lock);
>>> +    svc_loop_running = false;
>>> +    pthread_mutex_unlock(&svc_run_lock);
>>>    }
>>
>>
>>
>> _______________________________________________
>> Libtirpc-devel mailing list
>> Libtirpc-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/libtirpc-devel



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] svc_run: make sure only one svc_run loop runs in one process
  2019-06-12  3:32     ` Xiubo Li
@ 2019-06-12 16:46       ` Olga Kornievskaia
  2019-06-13  0:50         ` Xiubo Li
  0 siblings, 1 reply; 6+ messages in thread
From: Olga Kornievskaia @ 2019-06-12 16:46 UTC (permalink / raw)
  To: Xiubo Li; +Cc: Steve Dickson, libtirpc-devel, linux-nfs

On Wed, Jun 12, 2019 at 3:45 AM Xiubo Li <xiubli@redhat.com> wrote:
>
> On 2019/6/11 22:54, Steve Dickson wrote:
> > Sorry for the delay....
> >
> > On 5/15/19 10:55 PM, Xiubo Li wrote:
> >> Hey ping.
> >>
> >> What's the state of this patch and will it make sense here?
> > I'm not sure it does make sense.... Shouldn't the mutex lock
> > be in the call of svc_run()?
>
> Hi Steve,
>
> Yeah, mutex lock should be in the call of svc_run(). This is exactly
> what I do in this change.
>
> If the libtirpc means to allow only one svc_run() loop in each process,
> so IMO this change is needed. Or if we will allow more than one like the
> glibc version does, so this should be one bug in libtirpc.

Has there been effort into made into investigating what's causing the
crashes? We perhaps should make an effort to see if svc_run() is
thread-safe and examine which functions it uses and which might not be
thread safe. You might be able to allow greater parallelism then 1
thread in a svc_run() function by just making some not-thread safe
functions wrapped in pthread locks.

>
> Thanks.
> BRs
> Xiubo
>
>
> > steved.
> >
> >> Thanks
> >> BRs
> >>
> >> On 2019/4/9 19:37, xiubli@redhat.com wrote:
> >>> From: Xiubo Li <xiubli@redhat.com>
> >>>
> >>> In gluster-block project and there are 2 separate threads, both
> >>> of which will run the svc_run loop, this could work well in glibc
> >>> version, but in libtirpc we are hitting the random crash and stuck
> >>> issues.
> >>>
> >>> More detail please see:
> >>> https://github.com/gluster/gluster-block/pull/182
> >>>
> >>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> >>> ---
> >>>    src/svc_run.c | 19 +++++++++++++++++++
> >>>    1 file changed, 19 insertions(+)
> >>>
> >>> diff --git a/src/svc_run.c b/src/svc_run.c
> >>> index f40314b..b295755 100644
> >>> --- a/src/svc_run.c
> >>> +++ b/src/svc_run.c
> >>> @@ -38,12 +38,17 @@
> >>>    #include <string.h>
> >>>    #include <unistd.h>
> >>>    #include <sys/poll.h>
> >>> +#include <syslog.h>
> >>> +#include <stdbool.h>
> >>>        #include <rpc/rpc.h>
> >>>    #include "rpc_com.h"
> >>>    #include <sys/select.h>
> >>>    +static bool svc_loop_running = false;
> >>> +static pthread_mutex_t svc_run_lock = PTHREAD_MUTEX_INITIALIZER;
> >>> +
> >>>    void
> >>>    svc_run()
> >>>    {
> >>> @@ -51,6 +56,16 @@ svc_run()
> >>>      struct pollfd *my_pollfd = NULL;
> >>>      int last_max_pollfd = 0;
> >>>    +  pthread_mutex_lock(&svc_run_lock);
> >>> +  if (svc_loop_running) {
> >>> +    pthread_mutex_unlock(&svc_run_lock);
> >>> +    syslog (LOG_ERR, "svc_run: svc loop is already running in current process %d", getpid());
> >>> +    return;
> >>> +  }
> >>> +
> >>> +  svc_loop_running = true;
> >>> +  pthread_mutex_unlock(&svc_run_lock);
> >>> +
> >>>      for (;;) {
> >>>        int max_pollfd = svc_max_pollfd;
> >>>        if (max_pollfd == 0 && svc_pollfd == NULL)
> >>> @@ -111,4 +126,8 @@ svc_exit()
> >>>        svc_pollfd = NULL;
> >>>        svc_max_pollfd = 0;
> >>>        rwlock_unlock(&svc_fd_lock);
> >>> +
> >>> +    pthread_mutex_lock(&svc_run_lock);
> >>> +    svc_loop_running = false;
> >>> +    pthread_mutex_unlock(&svc_run_lock);
> >>>    }
> >>
> >>
> >>
> >> _______________________________________________
> >> Libtirpc-devel mailing list
> >> Libtirpc-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/libtirpc-devel
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] svc_run: make sure only one svc_run loop runs in one process
  2019-06-12 16:46       ` Olga Kornievskaia
@ 2019-06-13  0:50         ` Xiubo Li
  0 siblings, 0 replies; 6+ messages in thread
From: Xiubo Li @ 2019-06-13  0:50 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: Steve Dickson, libtirpc-devel, linux-nfs

On 2019/6/13 0:46, Olga Kornievskaia wrote:
> On Wed, Jun 12, 2019 at 3:45 AM Xiubo Li <xiubli@redhat.com> wrote:
>> On 2019/6/11 22:54, Steve Dickson wrote:
>>> Sorry for the delay....
>>>
>>> On 5/15/19 10:55 PM, Xiubo Li wrote:
>>>> Hey ping.
>>>>
>>>> What's the state of this patch and will it make sense here?
>>> I'm not sure it does make sense.... Shouldn't the mutex lock
>>> be in the call of svc_run()?
>> Hi Steve,
>>
>> Yeah, mutex lock should be in the call of svc_run(). This is exactly
>> what I do in this change.
>>
>> If the libtirpc means to allow only one svc_run() loop in each process,
>> so IMO this change is needed. Or if we will allow more than one like the
>> glibc version does, so this should be one bug in libtirpc.
> Has there been effort into made into investigating what's causing the
> crashes?

Before as our investigation and test, it was that if we ran two 
svc_run() loop in one process, such as in pthread1 and pthread2, it 
seems that pthread1 will receive the RPC connection/request which should 
be handled by pthread2's svc_run loop and vice versa.

Then we can see many random crash for tons of different reasons, like 
use after free and double free..., and almost every time the crash will 
randomly in different places and different libraries, such as the 
libtirpc, glusterfs and gluster-block...

After switching to multi processes instead of running two svc_run loop 
in multi pthreads, this issue has been resolved we didn't dig it further.


>   We perhaps should make an effort to see if svc_run() is
> thread-safe and examine which functions it uses and which might not be
> thread safe. You might be able to allow greater parallelism then 1
> thread in a svc_run() function by just making some not-thread safe
> functions wrapped in pthread locks.

Yeah, make sense.

Thanks.

BRs


>> Thanks.
>> BRs
>> Xiubo
>>
>>
>>> steved.
>>>
>>>> Thanks
>>>> BRs
>>>>
>>>> On 2019/4/9 19:37, xiubli@redhat.com wrote:
>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>
>>>>> In gluster-block project and there are 2 separate threads, both
>>>>> of which will run the svc_run loop, this could work well in glibc
>>>>> version, but in libtirpc we are hitting the random crash and stuck
>>>>> issues.
>>>>>
>>>>> More detail please see:
>>>>> https://github.com/gluster/gluster-block/pull/182
>>>>>
>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>> ---
>>>>>     src/svc_run.c | 19 +++++++++++++++++++
>>>>>     1 file changed, 19 insertions(+)
>>>>>
>>>>> diff --git a/src/svc_run.c b/src/svc_run.c
>>>>> index f40314b..b295755 100644
>>>>> --- a/src/svc_run.c
>>>>> +++ b/src/svc_run.c
>>>>> @@ -38,12 +38,17 @@
>>>>>     #include <string.h>
>>>>>     #include <unistd.h>
>>>>>     #include <sys/poll.h>
>>>>> +#include <syslog.h>
>>>>> +#include <stdbool.h>
>>>>>         #include <rpc/rpc.h>
>>>>>     #include "rpc_com.h"
>>>>>     #include <sys/select.h>
>>>>>     +static bool svc_loop_running = false;
>>>>> +static pthread_mutex_t svc_run_lock = PTHREAD_MUTEX_INITIALIZER;
>>>>> +
>>>>>     void
>>>>>     svc_run()
>>>>>     {
>>>>> @@ -51,6 +56,16 @@ svc_run()
>>>>>       struct pollfd *my_pollfd = NULL;
>>>>>       int last_max_pollfd = 0;
>>>>>     +  pthread_mutex_lock(&svc_run_lock);
>>>>> +  if (svc_loop_running) {
>>>>> +    pthread_mutex_unlock(&svc_run_lock);
>>>>> +    syslog (LOG_ERR, "svc_run: svc loop is already running in current process %d", getpid());
>>>>> +    return;
>>>>> +  }
>>>>> +
>>>>> +  svc_loop_running = true;
>>>>> +  pthread_mutex_unlock(&svc_run_lock);
>>>>> +
>>>>>       for (;;) {
>>>>>         int max_pollfd = svc_max_pollfd;
>>>>>         if (max_pollfd == 0 && svc_pollfd == NULL)
>>>>> @@ -111,4 +126,8 @@ svc_exit()
>>>>>         svc_pollfd = NULL;
>>>>>         svc_max_pollfd = 0;
>>>>>         rwlock_unlock(&svc_fd_lock);
>>>>> +
>>>>> +    pthread_mutex_lock(&svc_run_lock);
>>>>> +    svc_loop_running = false;
>>>>> +    pthread_mutex_unlock(&svc_run_lock);
>>>>>     }
>>>>
>>>>
>>>> _______________________________________________
>>>> Libtirpc-devel mailing list
>>>> Libtirpc-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/libtirpc-devel
>>


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-09 11:37 [PATCH] svc_run: make sure only one svc_run loop runs in one process xiubli
2019-05-16  2:55 ` Xiubo Li
2019-06-11 14:54   ` Steve Dickson
2019-06-12  3:32     ` Xiubo Li
2019-06-12 16:46       ` Olga Kornievskaia
2019-06-13  0:50         ` Xiubo Li

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org linux-nfs@archiver.kernel.org
	public-inbox-index linux-nfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox