XFS memory allocation deadlock in 2.6.38

* XFS memory allocation deadlock in 2.6.38
@ 2011-03-21 16:19 Sean Noonan
  2011-03-23 19:39 ` Sean Noonan
  2011-03-27 18:11 ` Maciej Rutecki
  0 siblings, 2 replies; 27+ messages in thread
From: Sean Noonan @ 2011-03-21 16:19 UTC (permalink / raw)
  To: 'linux-kernel@vger.kernel.org'
  Cc: Trammell Hudson, Martin Bligh, Stephen Degler, Christos Zoulas

[-- Attachment #1: Type: text/plain, Size: 2185 bytes --]

This message was originally posted to the XFS mailing list, but received no responses.  Thus, I am sending it to LKML on the advice of Martin.

Using the attached program, we are able to reproduce this bug reliably.
$ make vmtest
$ ./vmtest /xfs/hugefile.dat $(( 16 * 1024 * 1024 * 1024 )) # vmtest <path_to_file> <size_in_bytes>
/xfs/hugefile.dat: mapped 17179869184 bytes in 33822066943 ticks
749660: avg 13339 max 234667 ticks
371945: avg 26885 max 281616 ticks
---
At this point, we see the following on the console:
[593492.694806] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)
[593506.724367] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)
[593524.837717] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)
[593556.742386] XFS: possible memory allocation deadlock in kmem_alloc (mode:0x250)

This is the same message presented in
http://oss.sgi.com/bugzilla/show_bug.cgi?id=410

We started testing with 2.6.38-rc7 and have seen this bug through to the .0 release.  This does not appear to be present in 2.6.33, but we have not done testing in between.  We have tested with ext4 and do not encounter this bug.
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
# CONFIG_XFS_DEBUG is not set
# CONFIG_VXFS_FS is not set

Here is the stack from the process:
[<ffffffff81357553>] call_rwsem_down_write_failed+0x13/0x20
[<ffffffff812ddf1e>] xfs_ilock+0x7e/0x110
[<ffffffff8130132f>] __xfs_get_blocks+0x8f/0x4e0
[<ffffffff813017b1>] xfs_get_blocks+0x11/0x20
[<ffffffff8114ba3e>] __block_write_begin+0x1ee/0x5b0
[<ffffffff8114be9d>] block_page_mkwrite+0x9d/0xf0
[<ffffffff81307e05>] xfs_vm_page_mkwrite+0x15/0x20
[<ffffffff810f2ddb>] do_wp_page+0x54b/0x820
[<ffffffff810f347c>] handle_pte_fault+0x3cc/0x820
[<ffffffff810f5145>] handle_mm_fault+0x175/0x2f0
[<ffffffff8102e399>] do_page_fault+0x159/0x470
[<ffffffff816cf6cf>] page_fault+0x1f/0x30
[<ffffffffffffffff>] 0xffffffffffffffff

# uname -a
Linux testhost 2.6.38 #2 SMP PREEMPT Fri Mar 18 15:00:59 GMT 2011 x86_64 GNU/Linux

Please let me know if additional information is required.

Thanks!

Sean

[-- Attachment #2: vmtest.c --]
[-- Type: text/plain, Size: 2185 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
#include <inttypes.h>
#include <errno.h>
#include <fcntl.h>
#include <err.h>

static inline uint64_t
rdtsc(void)
{
    uint32_t low, high;
    __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
    return low | ((uint64_t) high) << 32;
}

void *
mmapfile(
    const char * filename,
    uint64_t len
)
{
    int perms = 0666;
    int open_flag = O_RDWR | O_CREAT;
    int mmap_flags = PROT_READ | PROT_WRITE;

    const int fd = open(filename, open_flag, perms);
    if (fd < 0)
	goto fail;

    // Ensure that the file is empty and the right size
    if (ftruncate(fd, 0) < 0)
	goto fail;

    if (ftruncate(fd, len) < 0)
	goto fail;

    // Map the entire actual length of the file
    void * const base = mmap(
	NULL,
	len,
	mmap_flags,
	MAP_SHARED | MAP_POPULATE,
	fd,
	0
    );
    if (base == MAP_FAILED)
	goto fail;

    close(fd);
    return base;

fail:
    err(1, "%s: Unable to map %"PRIu64" bytes", filename, len);
}

int main(
    int argc,
    char ** argv
)
{
    const char * filename = argv[1];
    const uint64_t len = argc > 2 ? strtoul(argv[2], NULL, 0) : (5ul << 30);
    const uint64_t max_index = len / sizeof(uint64_t);

    uint64_t mmap_time = -rdtsc();
    uint64_t * const buf = mmapfile(filename, len);
    mmap_time += rdtsc();
    fprintf(stderr, "%s: mapped %"PRIu64" bytes in %"PRIu64" ticks\n",
	filename,
	len,
	mmap_time
    );

    while (1)
    {
	uint64_t max = 0;
	uint64_t sum = 0;
	uint64_t i;
	const uint64_t loop_start = rdtsc();
	const uint64_t iters = 1 << 30;

	uint64_t start = loop_start;
	for (i = 0 ; i < iters ; i++)
	{
	    uint64_t i = lrand48() % max_index;
	    buf[i] += start;

	    uint64_t end = rdtsc();
	    const uint64_t delta = end - start;
	    start = end;

	    sum += delta;
	    if (delta > max)
		max = delta;

	    // Force a report every 10 billion ticks ~= 3 seconds
	    if (end - loop_start > 10e9)
		break;
	}

	printf("%"PRIu64": avg %"PRIu64" max %"PRIu64" ticks\n",
	    i,
	    i ? sum / i : 0,
	    max
	);
    }

    return 0;
}

^ permalink raw reply	[flat|nested] 27+ messages in thread