linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* decant_cull_table intermittently aborting cachefilesd
@ 2023-02-03 11:17 Chris Chilvers
  2023-03-28 17:22 ` Chris Chilvers
  0 siblings, 1 reply; 3+ messages in thread
From: Chris Chilvers @ 2023-02-03 11:17 UTC (permalink / raw)
  To: linux-cachefs, linux-nfs, benmaynard, brennandoyle, tom, daire,
	Chris Chilvers

I have been having an issue where cachefilesd will randomly crash causing the
cache to be withdrawn. The crash is intermittent and can sometimes happen
within minutes, other times it can take hours, or never.

Fortunately it has produced a crash dump so I've been able to analyse what
happened.

From the stack trace (and debug logging) the last operation it was running is
the decant_cull_table. The code fails in the check block at the end of the
function when it calls abort().

    (gdb) bt
    #0  __pthread_kill_implementation (no_tid=0, signo=6,
threadid=140614334650176) at ./nptl/pthread_kill.c:44
    #1  __pthread_kill_internal (signo=6, threadid=140614334650176) at
./nptl/pthread_kill.c:78
    #2  __GI___pthread_kill (threadid=140614334650176,
signo=signo@entry=6) at ./nptl/pthread_kill.c:89
    #3  0x00007fe353442476 in __GI_raise (sig=sig@entry=6) at
../sysdeps/posix/raise.c:26
    #4  0x00007fe3534287f3 in __GI_abort () at ./stdlib/abort.c:79
    #5  0x0000556d6c9f0965 in decant_cull_table () at cachefilesd.c:1571
    #6  cachefilesd () at cachefilesd.c:780
    #7  0x0000556d6c9f140b in main (argc=<optimized out>,
argv=<optimized out>) at cachefilesd.c:581

For reference the code at frame 5 from the decant_cull_table function is:

    check:
        for (loop = 0; loop < nr_in_ready_table; loop++)
            if (((long)cullready[loop] & 0xf0000000) == 0x60000000)
                abort();

Checking the cull table, the first object in the cull table appears to be
valid.

    (gdb) p nr_in_ready_table
    $1 = 242

    (gdb) p cullready[0]
    $2 = (struct object *) 0x556d6d7382a0

    (gdb) p -pretty -- *cullready[0]
    $3 = {
        parent = 0x556d6d7352b0,
        children = 0x0,
        next = 0x0,
        prev = 0x0,
        dir = 0x0,
        ino = 13631753,
        usage = 1,
        empty = false,
        new = false,
        cullable = true,
        type = OBJTYPE_DATA,
        atime = 1675349423,
        name = "E"
    }

The inode number from the struct matches a file in the fscache.

    $ sudo find /var/cache/fscache -inum 13631753
    /var/cache/fscache/cache/Infs,3.0,2,,300000a,e5e9b1269df2b0d,,,d0,100000,100000,249f0,249f0,249f0,249f0,1/@00/E210w114Hg92Az0HAMYCClFMVmkMY050002w1qO200

However, the memory address of the struct matches (fails) the check.

    (gdb) p (((long)cullready[0] & 0xf0000000) == 0x60000000)
    $4 = 1

      0000 556d 6d73 82a0
    & 0000 0000 f000 0000
    = 0000 0000 6000 0000

    $ file /sbin/cachefilesd
    /sbin/cachefilesd: ELF 64-bit LSB pie executable, x86-64

Looking at the code, I suspect that this magic 0x60000000 number is supposed
to be some kind of sentinel value that's used as a bug check for errors such
as use after free? This would make sense when the application was 32 bit, as
address pattern 0110 in the highest nibble either cannot occur, or lies within
the kernel address space. However, when compiled as 64 bit this assumption is
no longer true and the bit pattern can appear in perfectly valid addresses.

This would also explain the random nature of the crashes, as the cachefilesd
is at the whims of the OS and calloc function.

--
Chris

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: decant_cull_table intermittently aborting cachefilesd
  2023-02-03 11:17 decant_cull_table intermittently aborting cachefilesd Chris Chilvers
@ 2023-03-28 17:22 ` Chris Chilvers
  2024-01-21 20:57   ` [PATCH] Fix random abort() Hristo Venev
  0 siblings, 1 reply; 3+ messages in thread
From: Chris Chilvers @ 2023-03-28 17:22 UTC (permalink / raw)
  To: linux-cachefs, linux-nfs, benmaynard, brennandoyle, tom, daire,
	Chris Chilvers

On Fri, 3 Feb 2023 at 11:17, Chris Chilvers <chilversc@gmail.com> wrote:
>
> I have been having an issue where cachefilesd will randomly crash causing the
> cache to be withdrawn. The crash is intermittent and can sometimes happen
> within minutes, other times it can take hours, or never.
>
> Fortunately it has produced a crash dump so I've been able to analyse what
> happened.
>
> From the stack trace (and debug logging) the last operation it was running is
> the decant_cull_table. The code fails in the check block at the end of the
> function when it calls abort().
>
>     (gdb) bt
>     #0  __pthread_kill_implementation (no_tid=0, signo=6,
> threadid=140614334650176) at ./nptl/pthread_kill.c:44
>     #1  __pthread_kill_internal (signo=6, threadid=140614334650176) at
> ./nptl/pthread_kill.c:78
>     #2  __GI___pthread_kill (threadid=140614334650176,
> signo=signo@entry=6) at ./nptl/pthread_kill.c:89
>     #3  0x00007fe353442476 in __GI_raise (sig=sig@entry=6) at
> ../sysdeps/posix/raise.c:26
>     #4  0x00007fe3534287f3 in __GI_abort () at ./stdlib/abort.c:79
>     #5  0x0000556d6c9f0965 in decant_cull_table () at cachefilesd.c:1571
>     #6  cachefilesd () at cachefilesd.c:780
>     #7  0x0000556d6c9f140b in main (argc=<optimized out>,
> argv=<optimized out>) at cachefilesd.c:581
>
> For reference the code at frame 5 from the decant_cull_table function is:
>
>     check:
>         for (loop = 0; loop < nr_in_ready_table; loop++)
>             if (((long)cullready[loop] & 0xf0000000) == 0x60000000)
>                 abort();
>
> Checking the cull table, the first object in the cull table appears to be
> valid.
>
>     (gdb) p nr_in_ready_table
>     $1 = 242
>
>     (gdb) p cullready[0]
>     $2 = (struct object *) 0x556d6d7382a0
>
>     (gdb) p -pretty -- *cullready[0]
>     $3 = {
>         parent = 0x556d6d7352b0,
>         children = 0x0,
>         next = 0x0,
>         prev = 0x0,
>         dir = 0x0,
>         ino = 13631753,
>         usage = 1,
>         empty = false,
>         new = false,
>         cullable = true,
>         type = OBJTYPE_DATA,
>         atime = 1675349423,
>         name = "E"
>     }
>
> The inode number from the struct matches a file in the fscache.
>
>     $ sudo find /var/cache/fscache -inum 13631753
>     /var/cache/fscache/cache/Infs,3.0,2,,300000a,e5e9b1269df2b0d,,,d0,100000,100000,249f0,249f0,249f0,249f0,1/@00/E210w114Hg92Az0HAMYCClFMVmkMY050002w1qO200
>
> However, the memory address of the struct matches (fails) the check.
>
>     (gdb) p (((long)cullready[0] & 0xf0000000) == 0x60000000)
>     $4 = 1
>
>       0000 556d 6d73 82a0
>     & 0000 0000 f000 0000
>     = 0000 0000 6000 0000
>
>     $ file /sbin/cachefilesd
>     /sbin/cachefilesd: ELF 64-bit LSB pie executable, x86-64
>
> Looking at the code, I suspect that this magic 0x60000000 number is supposed
> to be some kind of sentinel value that's used as a bug check for errors such
> as use after free? This would make sense when the application was 32 bit, as
> address pattern 0110 in the highest nibble either cannot occur, or lies within
> the kernel address space. However, when compiled as 64 bit this assumption is
> no longer true and the bit pattern can appear in perfectly valid addresses.
>
> This would also explain the random nature of the crashes, as the cachefilesd
> is at the whims of the OS and calloc function.
>
> --
> Chris

Any thoughts on this issue? I think the main question to be answered is if the
debug checks such as "(0x6b000000 | __LINE__)" still have any value. If not
this can be simplified by simply setting the pointer to null, and updating
the check to look for nulls.

If __LINE__ still has value then there are two questions to answer:

1. How to make this safe for 64 bit architectures?
2. Should __LINE__ only be included in debug builds, and null used normally?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] Fix random abort()
  2023-03-28 17:22 ` Chris Chilvers
@ 2024-01-21 20:57   ` Hristo Venev
  0 siblings, 0 replies; 3+ messages in thread
From: Hristo Venev @ 2024-01-21 20:57 UTC (permalink / raw)
  To: David Howells, linux-cachefs
  Cc: Chris Chilvers, linux-nfs, benmaynard, brennandoyle, tom, daire,
	Chris Chilvers, Hristo Venev

It is perfectly fine for the high nibble of the low 32 bits of a pointer
to be 6. Use NULL as a marker for missing entries.

Signed-off-by: Hristo Venev <hristo@venev.name>
---
 cachefilesd.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/cachefilesd.c b/cachefilesd.c
index d4d236f..851815d 100644
--- a/cachefilesd.c
+++ b/cachefilesd.c
@@ -30,6 +30,7 @@
 #define CACHEFILESD_VERSION "0.10.10"
 
 #define _GNU_SOURCE
+#include <assert.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -1092,7 +1093,6 @@ static void put_object(struct object *object)
 
 	parent = object->parent;
 
-	memset(object, 0x6d, sizeof(struct object));
 	free(object);
 
 	if (parent)
@@ -1213,7 +1213,7 @@ static void insert_into_cull_table(struct object *object)
 
 	/* newest object in table will be displaced by this one */
 	put_object(cullbuild[0]);
-	cullbuild[0] = (void *)(0x6b000000 | __LINE__);
+	cullbuild[0] = NULL;
 	object->usage++;
 
 	/* place directly in first slot if second is older */
@@ -1391,7 +1391,7 @@ next:
 
 			if (loop == nr_in_ready_table - 1) {
 				/* child was oldest object */
-				cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__);
+				cullready[--nr_in_ready_table] = NULL;
 				put_object(child);
 				goto removed;
 			}
@@ -1400,7 +1400,7 @@ next:
 				memmove(&cullready[loop],
 					&cullready[loop + 1],
 					(nr_in_ready_table - (loop + 1)) * sizeof(cullready[0]));
-				cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__);
+				cullready[--nr_in_ready_table] = NULL;
 				put_object(child);
 				goto removed;
 			}
@@ -1409,17 +1409,11 @@ next:
 				if (cullbuild[loop] == child)
 					break;
 
-			if (loop == nr_in_build_table - 1) {
-				/* child was oldest object */
-				cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__);
-				put_object(child);
-			}
-			else if (loop < nr_in_build_table - 1) {
-				/* child was somewhere in between */
+			if (loop < nr_in_build_table) {
 				memmove(&cullbuild[loop],
 					&cullbuild[loop + 1],
 					(nr_in_build_table - (loop + 1)) * sizeof(cullbuild[0]));
-				cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__);
+				cullbuild[--nr_in_build_table] = NULL;
 				put_object(child);
 			}
 
@@ -1531,7 +1525,7 @@ static void decant_cull_table(void)
 
 		n = copy * sizeof(cullready[0]);
 		memcpy(cullready, cullbuild, n);
-		memset(cullbuild, 0x6e, n);
+		memset(cullbuild, 0, n);
 		nr_in_ready_table = nr_in_build_table;
 		nr_in_build_table = 0;
 		goto check;
@@ -1558,8 +1552,9 @@ static void decant_cull_table(void)
 	memmove(&cullready[copy], &cullready[0], nr_in_ready_table * sizeof(cullready[0]));
 	nr_in_ready_table += copy;
 
-	memcpy(&cullready[0], &cullbuild[leave], copy * sizeof(cullready[0]));
-	memset(&cullbuild[leave], 0x6b, copy * sizeof(cullbuild[0]));
+	n = copy * sizeof(cullready[0]);
+	memcpy(&cullready[0], &cullbuild[leave], n);
+	memset(&cullbuild[leave], 0, n);
 	nr_in_build_table = leave;
 
 	if (copy + leave > culltable_size)
@@ -1567,8 +1562,7 @@ static void decant_cull_table(void)
 
 check:
 	for (loop = 0; loop < nr_in_ready_table; loop++)
-		if (((long)cullready[loop] & 0xf0000000) == 0x60000000)
-			abort();
+		assert(cullready[loop]);
 }
 
 /*****************************************************************************/
@@ -1645,6 +1639,6 @@ static void cull_objects(void)
 
 	if (cullready[nr_in_ready_table - 1]->cullable) {
 		cull_object(cullready[nr_in_ready_table - 1]);
-		cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__);
+		cullready[--nr_in_ready_table] = NULL;
 	}
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-01-21 21:17 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-03 11:17 decant_cull_table intermittently aborting cachefilesd Chris Chilvers
2023-03-28 17:22 ` Chris Chilvers
2024-01-21 20:57   ` [PATCH] Fix random abort() Hristo Venev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).