From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: Re: [PATCH 3 of 3] KEXEC: Allocate crash structures in low memory Date: Fri, 23 Dec 2011 09:06:12 +0000 Message-ID: <4EF452940200007800069B89@nat28.tlf.novell.com> References: <23c31d59ffb1590815bc.1324575385@andrewcoop.uk.xensource.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <23c31d59ffb1590815bc.1324575385@andrewcoop.uk.xensource.com> Content-Disposition: inline List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Andrew Cooper Cc: xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org >>> On 22.12.11 at 18:36, Andrew Cooper wrote: > On 64bit Xen with 32bit dom0 and crashkernel, xmalloc'ing items such > as the CPU crash notes will go into the xenheap, which tends to be in > upper memory. This causes problems on machines with more than 64GB > (or 4GB if no PAE support) of ram as the crashkernel physically cant > access the crash notes. What use is a crash dump taken with a 32-bit kernel on a machine with more than 64G (or more than 4G is the crash kernel doesn't support PAE)? > The solution is to force Xen to allocate certain structures in lower > memory. This is achieved by introducing two new command line > parameters; low_crashinfo and crashinfo_maxaddr. Because of the > potential impact on 32bit PV guests, and that this problem does not > exist for 64bit dom0 on 64bit Xen, this new functionality defaults to > the codebase's previous behavior, requiring the user to explicitly > add extra command line parameters to change the behavior. > > This patch consists of 3 logically distinct but closely related > changes. > 1) Add the two new command line parameters. Do you really need both? Just being able to set the max address would seem sufficient... > 2) Change crash note allocation to use lower memory when instructed. > 3) Change the conring buffer to use lower memory when instructed. Why does this one need moving, but none of the other Xen data structures? If anything, I would have expected you to limit the Xen heap altogether, so that automatically all allocations get done in a way so that at least Xen's data structures are available in the (truncated) dump. > There result is that the crash notes and console ring will be placed > in lower memory so useful information can be recovered in the case of > a crash. > > Signed-off-by: Andrew Cooper > > diff -r 3da37c68284f -r 23c31d59ffb1 xen/arch/x86/setup.c > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -586,6 +586,10 @@ void __init __start_xen(unsigned long mb > } > cmdline_parse(cmdline); > > + /* Must be after command line argument parsing and before > + * allocing any xenheap structures wanted in lower memory. */ > + kexec_early_calculations(); > + But does it really need to be *this* early? > parse_video_info(); > > set_current((struct vcpu *)0xfffff000); /* debug sanity */ > diff -r 3da37c68284f -r 23c31d59ffb1 xen/common/kexec.c > --- a/xen/common/kexec.c > +++ b/xen/common/kexec.c > @@ -36,7 +36,9 @@ bool_t kexecing = FALSE; > typedef struct { Elf_Note * start; size_t size; } crash_note_range_t; > static crash_note_range_t * crash_notes; > > -/* Lock to prevent race conditions when allocating the crash note buffers. > */ > +/* Lock to prevent race conditions when allocating the crash note buffers. > + * It also serves to protect calls to alloc_from_crash_heap when allocating > + * crash note buffers in lower memory. */ > static DEFINE_SPINLOCK(crash_notes_lock); > > static Elf_Note *xen_crash_note; > @@ -70,6 +72,23 @@ static struct { > (_d_)->start = (_s_)->start; \ > } while (0) > > +/* Low crashinfo mode. Start as INVALID so serveral codepaths can set up > + * defaults without needing to know the state of the others. */ > +enum low_crashinfo low_crashinfo_mode = LOW_CRASHINFO_INVALID; > + > +/* This value is only considered if low_crash_mode is set to MIN or ALL, so > + * setting a default here is safe. Default to 4GB. This is because the > current > + * KEXEC_CMD_get_range compat hypercall trucates 64bit pointers to 32 bits. > The > + * typical usecase for crashinfo_maxaddr will be for 64bit Xen with 32bit > dom0 > + * and 32bit crash kernel. */ > +static paddr_t crashinfo_maxaddr = 4ULL << 30; __initdata > + > +/* = log base 2 of crashinfo_maxaddr after checking for sanity. */ > +paddr_t crashinfo_maxaddr_bits = 0; > + > +/* Pointers to keep track of the crash heap region. */ > +static void *crash_heap_current = NULL, *crash_heap_end = NULL; > + > /* > * Parse command lines in the format > * > @@ -148,6 +167,58 @@ static void __init parse_crashkernel(con > } > custom_param("crashkernel", parse_crashkernel); > > +/* Parse command lines in the format: > + * > + * low_crashinfo=[none,min,all] > + * > + * - none disables the low allocation of crash info. > + * - min will allocate enough low information for the crash kernel to be > able > + * to extract the hypervisor and dom0 message ring buffers. > + * - all will allocate additional structures such as domain and vcpu structs > + * low so the crash kernel can perform an extended analysis of state. > + */ > +static void __init parse_low_crashinfo(const char * str) > +{ > + > + if ( !strlen(str) ) > + /* default to min if user just specifies "low_crashinfo" */ > + low_crashinfo_mode = LOW_CRASHINFO_MIN; > + else if ( !strcmp(str, "none" ) ) > + low_crashinfo_mode = LOW_CRASHINFO_NONE; > + else if ( !strcmp(str, "min" ) ) > + low_crashinfo_mode = LOW_CRASHINFO_MIN; > + else if ( !strcmp(str, "all" ) ) > + low_crashinfo_mode = LOW_CRASHINFO_ALL; > + else > + { > + printk("Unknown low_crashinfo parameter '%s'. Defaulting to > min.\n", str); > + low_crashinfo_mode = LOW_CRASHINFO_MIN; > + } > +} > +custom_param("low_crashinfo", parse_low_crashinfo); > + > +/* Parse command lines in the format: > + * > + * crashinfo_maxaddr= > + * > + * will be rounded down to the nearest power of two. Defaults to 64G > + */ > +static void __init parse_crashinfo_maxaddr(const char * str) > +{ > + u64 addr; > + > + /* if low_crashinfo_mode is unset, default to min. */ > + if ( low_crashinfo_mode == LOW_CRASHINFO_INVALID ) > + low_crashinfo_mode = LOW_CRASHINFO_MIN; > + > + if ( (addr = parse_size_and_unit(str, NULL)) ) > + crashinfo_maxaddr = addr; > + else > + printk("Unable to parse crashinfo_maxaddr. Defaulting to %p\n", > + (void*)crashinfo_maxaddr); > +} > +custom_param("crashinfo_maxaddr", parse_crashinfo_maxaddr); > + > void __init set_kexec_crash_area_size(u64 system_ram) > { > unsigned int idx; > @@ -306,6 +377,20 @@ static size_t sizeof_cpu_notes(const uns > return bytes; > } > > +/* Allocate size_t bytes of space from the previously allocated > + * crash heap if the user has requested that crash notes be allocated > + * in lower memory. There is currently no case where the crash notes > + * should be free()'d. */ > +static void * alloc_from_crash_heap(const size_t bytes) > +{ > + void * ret; > + if ( crash_heap_current + bytes > crash_heap_end ) > + return NULL; > + ret = (void*)crash_heap_current; > + crash_heap_current += bytes; > + return ret; > +} > + > /* Allocate a crash note buffer for a newly onlined cpu. */ > static int kexec_init_cpu_notes(const unsigned long cpu) > { > @@ -320,7 +405,10 @@ static int kexec_init_cpu_notes(const un > return ret; > > nr_bytes = sizeof_cpu_notes(cpu); > - note = xmalloc_bytes(nr_bytes); > + > + /* If we dont care about the position of allocation, malloc. */ > + if ( low_crashinfo_mode == LOW_CRASHINFO_NONE ) > + note = xmalloc_bytes(nr_bytes); > > /* Protect the write into crash_notes[] with a spinlock, as this > function > * is on a hotplug path and a hypercall path. */ > @@ -338,6 +426,11 @@ static int kexec_init_cpu_notes(const un > } > else > { > + /* If we care about memory possition, alloc from the crash heap, > + * also protected by the crash_notes_lock. */ > + if ( low_crashinfo_mode > LOW_CRASHINFO_NONE ) > + note = alloc_from_crash_heap(nr_bytes); > + > crash_notes[cpu].start = note; > crash_notes[cpu].size = nr_bytes; > spin_unlock(&crash_notes_lock); > @@ -397,6 +490,24 @@ static struct notifier_block cpu_nfb = { > .notifier_call = cpu_callback > }; > > +void __init kexec_early_calculations(void) > +{ > + /* If low_crashinfo_mode is still INVALID, neither "low_crashinfo" nor > + * "crashinfo_maxaddr" have been specified on the command line, so > + * explicitly set to NONE. */ > + if ( low_crashinfo_mode == LOW_CRASHINFO_INVALID ) > + low_crashinfo_mode = LOW_CRASHINFO_NONE; > + > + crashinfo_maxaddr_bits = 0; > + if ( low_crashinfo_mode > LOW_CRASHINFO_NONE ) > + { > + paddr_t tmp = crashinfo_maxaddr; > + > + while ( tmp >>= 1 ) > + crashinfo_maxaddr_bits++; fls() or some of its cousins > + } > +} > + > static int __init kexec_init(void) > { > void *cpu = (void *)(unsigned long)smp_processor_id(); > @@ -407,6 +518,29 @@ static int __init kexec_init(void) > > register_keyhandler('C', &crashdump_trigger_keyhandler); > > + if ( low_crashinfo_mode > LOW_CRASHINFO_NONE ) > + { > + size_t crash_heap_size; > + > + /* This calculation is safe even if the machine is booted in > + * uniprocessor mode. */ > + crash_heap_size = sizeof_cpu_notes(0) + > + sizeof_cpu_notes(1) * (nr_cpu_ids - 1); > + crash_heap_size = PAGE_ALIGN(crash_heap_size); > + > + crash_heap_current = alloc_xenheap_pages( > + get_order_from_bytes(crash_heap_size), > + MEMF_bits(crashinfo_maxaddr_bits) ); > + > + if ( crash_heap_current ) > + crash_heap_end = crash_heap_current + crash_heap_size; > + else > + return -ENOMEM; Constructs like this generally look bogus to me, as if ( !crash_heap_current ) return -ENOMEM; ... is shorter and (to me at least) more clear. > + } > + > + /* crash_notes may be allocated anywhere Xen can reach in memory. > + Only the individual CPU crash notes themselves must be allocated > + in lower memory if requested. */ > crash_notes = xmalloc_array(crash_note_range_t, nr_cpu_ids); > if ( ! crash_notes ) > return -ENOMEM; > diff -r 3da37c68284f -r 23c31d59ffb1 xen/drivers/char/console.c > --- a/xen/drivers/char/console.c > +++ b/xen/drivers/char/console.c > @@ -584,6 +584,7 @@ void __init console_init_postirq(void) > { > char *ring; > unsigned int i, order; > + u64 memflags; > > serial_init_postirq(); > > @@ -591,7 +592,8 @@ void __init console_init_postirq(void) > opt_conring_size = num_present_cpus() << (9 + xenlog_lower_thresh); > > order = get_order_from_bytes(max(opt_conring_size, conring_size)); > - while ( (ring = alloc_xenheap_pages(order, 0)) == NULL ) > + memflags = low_crashinfo_mode > LOW_CRASHINFO_NONE ? > MEMF_bits(crashinfo_maxaddr_bits) : 0; > + while ( (ring = alloc_xenheap_pages(order, memflags)) == NULL ) > { > BUG_ON(order == 0); > order--; > diff -r 3da37c68284f -r 23c31d59ffb1 xen/include/xen/kexec.h > --- a/xen/include/xen/kexec.h > +++ b/xen/include/xen/kexec.h > @@ -25,6 +25,19 @@ void set_kexec_crash_area_size(u64 syste > #define KEXEC_IMAGE_CRASH_BASE 2 > #define KEXEC_IMAGE_NR 4 > > +enum low_crashinfo { > + LOW_CRASHINFO_INVALID = 0, > + LOW_CRASHINFO_NONE = 1, > + LOW_CRASHINFO_MIN = 2, > + LOW_CRASHINFO_ALL = 3 > +}; > + > +/* Low crashinfo mode. Start as INVALID so serveral codepaths can set up > + * defaults without needing to know the state of the others. */ > +extern enum low_crashinfo low_crashinfo_mode; > +extern paddr_t crashinfo_maxaddr_bits; > +void kexec_early_calculations(void); > + > int machine_kexec_load(int type, int slot, xen_kexec_image_t *image); > void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image); > void machine_kexec_reserved(xen_kexec_reserve_t *reservation); > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel