* [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
@ 2013-10-08 0:41 Marcelo Tosatti
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
` (2 more replies)
0 siblings, 3 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-08 0:41 UTC (permalink / raw)
To: qemu-devel
See individual patches for details.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-08 0:41 [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Marcelo Tosatti
@ 2013-10-08 0:41 ` Marcelo Tosatti
2013-10-08 8:03 ` Paolo Bonzini
` (2 more replies)
2013-10-08 0:41 ` [Qemu-devel] [patch 2/2] qemu: add -mem-path-force option to force RAM allocation via -mem-path Marcelo Tosatti
2013-10-08 7:32 ` [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Markus Armbruster
2 siblings, 3 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-08 0:41 UTC (permalink / raw)
To: qemu-devel; +Cc: Marcelo Tosatti
[-- Attachment #1: map-populate-failure --]
[-- Type: text/plain, Size: 5230 bytes --]
MAP_POPULATE mmap flag does not cause mmap to fail if allocation
of the entire area is not performed. HugeTLBfs performs reservation
of pages on a global basis: any further restriction to the reserved memory
such as cpusets placement or numa node policy is performed at
fault time only.
Manually fault in pages at allocation time. This allows memory restrictions
to be applied before guest initialization.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: qemu/exec.c
===================================================================
--- qemu.orig/exec.c
+++ qemu/exec.c
@@ -918,6 +918,13 @@ static long gethugepagesize(const char *
return fs.f_bsize;
}
+sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+ siglongjmp(sigjump, 1);
+}
+
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path)
@@ -927,9 +934,6 @@ static void *file_ram_alloc(RAMBlock *bl
char *c;
void *area;
int fd;
-#ifdef MAP_POPULATE
- int flags;
-#endif
unsigned long hpagesize;
hpagesize = gethugepagesize(path);
@@ -977,21 +981,57 @@ static void *file_ram_alloc(RAMBlock *bl
if (ftruncate(fd, memory))
perror("ftruncate");
-#ifdef MAP_POPULATE
- /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
- * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
- * to sidestep this quirk.
- */
- flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
- area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
-#else
area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-#endif
if (area == MAP_FAILED) {
perror("file_ram_alloc: can't mmap RAM pages");
close(fd);
return (NULL);
}
+
+ if (mem_prealloc) {
+ int ret, i;
+ struct sigaction act, oldact;
+ sigset_t set, oldset;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+
+ ret = sigaction(SIGBUS, &act, &oldact);
+ if (ret) {
+ perror("file_ram_alloc: fail to install signal handler");
+ exit(1);
+ }
+
+ /* unblock SIGBUS */
+ pthread_sigmask(SIG_BLOCK, NULL, &oldset);
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+
+ if (sigsetjmp(sigjump, 1)) {
+ fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
+ exit(1);
+ }
+
+ /* MAP_POPULATE silently ignores failures */
+ for (i = 0; i < (memory/hpagesize)-1; i++) {
+ memset(area + (hpagesize*i), 0, 1);
+ }
+
+ ret = sigaction(SIGBUS, &oldact, NULL);
+ if (ret) {
+ perror("file_ram_alloc: fail to reinstall signal handler");
+ exit(1);
+ }
+
+ if (sigismember(&oldset, SIGBUS)) {
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_BLOCK, &set, NULL);
+ }
+ }
+
block->fd = fd;
return area;
}
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c
+++ qemu/vl.c
@@ -188,9 +188,7 @@ static int display_remote;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
const char *mem_path = NULL;
-#ifdef MAP_POPULATE
int mem_prealloc = 0; /* force preallocation of physical target memory */
-#endif
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart;
@@ -3205,11 +3203,9 @@ int main(int argc, char **argv, char **e
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
-#ifdef MAP_POPULATE
case QEMU_OPTION_mem_prealloc:
mem_prealloc = 1;
break;
-#endif
case QEMU_OPTION_d:
log_mask = optarg;
break;
Index: qemu/qemu-options.def
===================================================================
--- qemu.orig/qemu-options.def
+++ qemu/qemu-options.def
@@ -66,11 +66,9 @@ stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_
DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
"-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
-#ifdef MAP_POPULATE
DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
"-mem-prealloc preallocate guest memory (use with -mem-path)\n",
QEMU_ARCH_ALL)
-#endif
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n",
Index: git/qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx
+++ qemu/qemu-options.hx
@@ -228,7 +228,6 @@ STEXI
Allocate guest RAM from a temporarily created file in @var{path}.
ETEXI
-#ifdef MAP_POPULATE
DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
"-mem-prealloc preallocate guest memory (use with -mem-path)\n",
QEMU_ARCH_ALL)
@@ -237,7 +236,6 @@ STEXI
@findex -mem-prealloc
Preallocate memory when using -mem-path.
ETEXI
-#endif
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n",
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Qemu-devel] [patch 2/2] qemu: add -mem-path-force option to force RAM allocation via -mem-path
2013-10-08 0:41 [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Marcelo Tosatti
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
@ 2013-10-08 0:41 ` Marcelo Tosatti
2013-10-08 7:32 ` [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Markus Armbruster
2 siblings, 0 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-08 0:41 UTC (permalink / raw)
To: qemu-devel; +Cc: Marcelo Tosatti
[-- Attachment #1: add-option-to-force-mem-path --]
[-- Type: text/plain, Size: 3108 bytes --]
Default behaviour is to fallback for standard RAM allocation if -mem-path
allocation fails.
Add an option to force -mem-path RAM allocation (failing otherwise).
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: qemu/exec.c
===================================================================
--- qemu.orig/exec.c
+++ qemu/exec.c
@@ -985,6 +985,9 @@ static void *file_ram_alloc(RAMBlock *bl
if (area == MAP_FAILED) {
perror("file_ram_alloc: can't mmap RAM pages");
close(fd);
+ if (mem_path_force) {
+ exit(1);
+ }
return (NULL);
}
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c
+++ qemu/vl.c
@@ -189,6 +189,7 @@ const char* keyboard_layout = NULL;
ram_addr_t ram_size;
const char *mem_path = NULL;
int mem_prealloc = 0; /* force preallocation of physical target memory */
+int mem_path_force = 0; /* quit in case -mem-path allocation fails */
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart;
@@ -3203,6 +3204,9 @@ int main(int argc, char **argv, char **e
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
+ case QEMU_OPTION_mempath_force:
+ mem_path_force = 1;
+ break;
case QEMU_OPTION_mem_prealloc:
mem_prealloc = 1;
break;
Index: qemu/include/exec/cpu-all.h
===================================================================
--- qemu.orig/include/exec/cpu-all.h
+++ qemu/include/exec/cpu-all.h
@@ -468,6 +468,7 @@ typedef struct RAMList {
extern RAMList ram_list;
extern const char *mem_path;
+extern int mem_path_force;
extern int mem_prealloc;
/* Flags stored in the low bits of the TLB virtual address. These are
Index: qemu/qemu-options.def
===================================================================
--- qemu.orig/qemu-options.def
+++ qemu/qemu-options.def
@@ -66,6 +66,9 @@ stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_
DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
"-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
+DEF("mem-path-force", 0, QEMU_OPTION_mempath_force,
+"-mem-path-force fail if unable to allocate RAM as specified by -mem-path\n", QEMU_ARCH_ALL)
+
DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
"-mem-prealloc preallocate guest memory (use with -mem-path)\n",
QEMU_ARCH_ALL)
Index: qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx
+++ qemu/qemu-options.hx
@@ -228,6 +228,14 @@ STEXI
Allocate guest RAM from a temporarily created file in @var{path}.
ETEXI
+DEF("mem-path-force", 0, QEMU_OPTION_mempath_force,
+ "-mem-path-force fail if unable to allocate RAM as specified by -mem-path\n", QEMU_ARCH_ALL)
+STEXI
+@item -mem-path-force
+@findex -mem-path-force
+Fail if unable to allocate RAM as specified by -mem-path.
+ETEXI
+
DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
"-mem-prealloc preallocate guest memory (use with -mem-path)\n",
QEMU_ARCH_ALL)
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-08 0:41 [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Marcelo Tosatti
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
2013-10-08 0:41 ` [Qemu-devel] [patch 2/2] qemu: add -mem-path-force option to force RAM allocation via -mem-path Marcelo Tosatti
@ 2013-10-08 7:32 ` Markus Armbruster
2013-10-08 8:02 ` Paolo Bonzini
2 siblings, 1 reply; 22+ messages in thread
From: Markus Armbruster @ 2013-10-08 7:32 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel
We have
-mem-path FILE provide backing storage for guest RAM
-mem-prealloc preallocate guest memory (use with -mem-path)
PATCH 2/2 adds
-mem-path-force fail if unable to allocate RAM as specified by -mem-path
Looks like it's time to consolidate the options related to guest memory
into a single, QemuOpts-style -memory NAME=VALUE,... What do you guys
think?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-08 7:32 ` [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Markus Armbruster
@ 2013-10-08 8:02 ` Paolo Bonzini
2013-10-08 22:03 ` Marcelo Tosatti
0 siblings, 1 reply; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-08 8:02 UTC (permalink / raw)
To: Markus Armbruster; +Cc: Marcelo Tosatti, qemu-devel
Il 08/10/2013 09:32, Markus Armbruster ha scritto:
> We have
>
> -mem-path FILE provide backing storage for guest RAM
> -mem-prealloc preallocate guest memory (use with -mem-path)
>
> PATCH 2/2 adds
>
> -mem-path-force fail if unable to allocate RAM as specified by -mem-path
>
> Looks like it's time to consolidate the options related to guest memory
> into a single, QemuOpts-style -memory NAME=VALUE,... What do you guys
> think?
Yes, we can use "-numa memory" (or "-numa mem") that Wanlong Gao is
adding. We can add path=, preallocate= and force= options there.
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
@ 2013-10-08 8:03 ` Paolo Bonzini
2013-10-08 21:51 ` Marcelo Tosatti
2013-10-28 13:48 ` Paolo Bonzini
2013-10-28 16:15 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Peter Maydell
2 siblings, 1 reply; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-08 8:03 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel
Il 08/10/2013 02:41, Marcelo Tosatti ha scritto:
> + /* unblock SIGBUS */
> + pthread_sigmask(SIG_BLOCK, NULL, &oldset);
> + sigemptyset(&set);
> + sigaddset(&set, SIGBUS);
> + pthread_sigmask(SIG_UNBLOCK, &set, NULL);
Please instead modify qemu-thread-posix.c to unblock all per-thread
signals (SIGBUS, SIGSEGV, SIGILL, SIGFPE and SIGSYS). There is no need
to keep those blocked.
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-08 8:03 ` Paolo Bonzini
@ 2013-10-08 21:51 ` Marcelo Tosatti
2013-10-09 8:05 ` Paolo Bonzini
0 siblings, 1 reply; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-08 21:51 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Tue, Oct 08, 2013 at 10:03:48AM +0200, Paolo Bonzini wrote:
> Il 08/10/2013 02:41, Marcelo Tosatti ha scritto:
> > + /* unblock SIGBUS */
> > + pthread_sigmask(SIG_BLOCK, NULL, &oldset);
> > + sigemptyset(&set);
> > + sigaddset(&set, SIGBUS);
> > + pthread_sigmask(SIG_UNBLOCK, &set, NULL);
>
> Please instead modify qemu-thread-posix.c to unblock all per-thread
> signals (SIGBUS, SIGSEGV, SIGILL, SIGFPE and SIGSYS). There is no need
> to keep those blocked.
>
> Paolo
main-loop.c handles SIGBUS via signalfd to emulate MCEs (associated
commits). Therefore it must be blocked.
Note that what this patch does it to maintain the signal handling state
(it saves the previous state, modifies state, restores previous state) so
that its unchanged.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-08 8:02 ` Paolo Bonzini
@ 2013-10-08 22:03 ` Marcelo Tosatti
2013-10-09 6:23 ` Markus Armbruster
2013-10-09 7:43 ` Paolo Bonzini
0 siblings, 2 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-08 22:03 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Markus Armbruster, qemu-devel
On Tue, Oct 08, 2013 at 10:02:26AM +0200, Paolo Bonzini wrote:
> Il 08/10/2013 09:32, Markus Armbruster ha scritto:
> > We have
> >
> > -mem-path FILE provide backing storage for guest RAM
> > -mem-prealloc preallocate guest memory (use with -mem-path)
> >
> > PATCH 2/2 adds
> >
> > -mem-path-force fail if unable to allocate RAM as specified by -mem-path
> >
> > Looks like it's time to consolidate the options related to guest memory
> > into a single, QemuOpts-style -memory NAME=VALUE,... What do you guys
> > think?
>
> Yes, we can use "-numa memory" (or "-numa mem") that Wanlong Gao is
> adding. We can add path=, preallocate= and force= options there.
>
> Paolo
It would be important for the new option to be backportable
independently. Therefore mixing it with -numa is not an option.
Also due to backportability supporting a new style of command line
for -mem-path is problematic (management must be changed accordingly).
Can the new option format for memory be created incrementally on
top of -mem-path-force? (agree its a good thing, it avoids proliferation
of new options).
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-08 22:03 ` Marcelo Tosatti
@ 2013-10-09 6:23 ` Markus Armbruster
2013-10-09 20:05 ` Marcelo Tosatti
2013-10-09 7:43 ` Paolo Bonzini
1 sibling, 1 reply; 22+ messages in thread
From: Markus Armbruster @ 2013-10-09 6:23 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Paolo Bonzini, qemu-devel
Marcelo Tosatti <mtosatti@redhat.com> writes:
> On Tue, Oct 08, 2013 at 10:02:26AM +0200, Paolo Bonzini wrote:
>> Il 08/10/2013 09:32, Markus Armbruster ha scritto:
>> > We have
>> >
>> > -mem-path FILE provide backing storage for guest RAM
>> > -mem-prealloc preallocate guest memory (use with -mem-path)
>> >
>> > PATCH 2/2 adds
>> >
>> > -mem-path-force fail if unable to allocate RAM as specified by
>> > -mem-path
>> >
>> > Looks like it's time to consolidate the options related to guest memory
>> > into a single, QemuOpts-style -memory NAME=VALUE,... What do you guys
>> > think?
>>
>> Yes, we can use "-numa memory" (or "-numa mem") that Wanlong Gao is
>> adding. We can add path=, preallocate= and force= options there.
>>
>> Paolo
>
> It would be important for the new option to be backportable
> independently. Therefore mixing it with -numa is not an option.
>
> Also due to backportability supporting a new style of command line
> for -mem-path is problematic (management must be changed accordingly).
We've converted -FOO ARG options to QemuOpts-style -FOO
NAME=VALUE,... before. You can use QemuOptsList member implied_opt_name
to get bare ARG accepted. Works except for ARGs containing '=' or ','.
Management still has to detect whether -FOO is old or new. QMP command
query-command-line-options should do.
> Can the new option format for memory be created incrementally on
> top of -mem-path-force? (agree its a good thing, it avoids proliferation
> of new options).
If you do it on top, it won't avoid proliferation, or am I missing
something?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-08 22:03 ` Marcelo Tosatti
2013-10-09 6:23 ` Markus Armbruster
@ 2013-10-09 7:43 ` Paolo Bonzini
1 sibling, 0 replies; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-09 7:43 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Markus Armbruster, qemu-devel
Il 09/10/2013 00:03, Marcelo Tosatti ha scritto:
> It would be important for the new option to be backportable
> independently. Therefore mixing it with -numa is not an option.
Why is it important to backport it independently? In any case you need
management support for the new option.
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-08 21:51 ` Marcelo Tosatti
@ 2013-10-09 8:05 ` Paolo Bonzini
2013-10-09 19:41 ` Marcelo Tosatti
0 siblings, 1 reply; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-09 8:05 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel
Il 08/10/2013 23:51, Marcelo Tosatti ha scritto:
> On Tue, Oct 08, 2013 at 10:03:48AM +0200, Paolo Bonzini wrote:
>> Il 08/10/2013 02:41, Marcelo Tosatti ha scritto:
>>> + /* unblock SIGBUS */
>>> + pthread_sigmask(SIG_BLOCK, NULL, &oldset);
>>> + sigemptyset(&set);
>>> + sigaddset(&set, SIGBUS);
>>> + pthread_sigmask(SIG_UNBLOCK, &set, NULL);
>>
>> Please instead modify qemu-thread-posix.c to unblock all per-thread
>> signals (SIGBUS, SIGSEGV, SIGILL, SIGFPE and SIGSYS). There is no need
>> to keep those blocked.
>
> main-loop.c handles SIGBUS via signalfd to emulate MCEs (associated
> commits). Therefore it must be blocked.
How was that tested? For BUS_MCEERR_AO it can work, but BUS_MCEERR_AR
calls force_sig_info which does this:
ignored = action->sa.sa_handler == SIG_IGN;
blocked = sigismember(&t->blocked, sig);
if (blocked || ignored) {
action->sa.sa_handler = SIG_DFL;
if (blocked) {
sigdelset(&t->blocked, sig);
recalc_sigpending_and_wake(t);
}
}
if (action->sa.sa_handler == SIG_DFL)
t->signal->flags &= ~SIGNAL_UNKILLABLE;
and kills the process (because that's the default action of SIG_DFL).
> Note that what this patch does it to maintain the signal handling state
> (it saves the previous state, modifies state, restores previous state) so
> that its unchanged.
Yes, understood. I was missing the part about MCE (I knew it used
SIGBUS, but forgot about signalfd). So this patch is good, but the
above point about BUS_MCEERR_AR needs to be checked sooner or later.
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-09 8:05 ` Paolo Bonzini
@ 2013-10-09 19:41 ` Marcelo Tosatti
2013-10-09 21:26 ` Paolo Bonzini
0 siblings, 1 reply; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-09 19:41 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Wed, Oct 09, 2013 at 10:05:44AM +0200, Paolo Bonzini wrote:
> Il 08/10/2013 23:51, Marcelo Tosatti ha scritto:
> > On Tue, Oct 08, 2013 at 10:03:48AM +0200, Paolo Bonzini wrote:
> >> Il 08/10/2013 02:41, Marcelo Tosatti ha scritto:
> >>> + /* unblock SIGBUS */
> >>> + pthread_sigmask(SIG_BLOCK, NULL, &oldset);
> >>> + sigemptyset(&set);
> >>> + sigaddset(&set, SIGBUS);
> >>> + pthread_sigmask(SIG_UNBLOCK, &set, NULL);
> >>
> >> Please instead modify qemu-thread-posix.c to unblock all per-thread
> >> signals (SIGBUS, SIGSEGV, SIGILL, SIGFPE and SIGSYS). There is no need
> >> to keep those blocked.
> >
> > main-loop.c handles SIGBUS via signalfd to emulate MCEs (associated
> > commits). Therefore it must be blocked.
>
> How was that tested? For BUS_MCEERR_AO it can work, but BUS_MCEERR_AR
> calls force_sig_info which does this:
>
> ignored = action->sa.sa_handler == SIG_IGN;
> blocked = sigismember(&t->blocked, sig);
> if (blocked || ignored) {
> action->sa.sa_handler = SIG_DFL;
> if (blocked) {
> sigdelset(&t->blocked, sig);
> recalc_sigpending_and_wake(t);
> }
>
> if (action->sa.sa_handler == SIG_DFL)
> t->signal->flags &= ~SIGNAL_UNKILLABLE;
>
> and kills the process (because that's the default action of SIG_DFL).
For vcpu context its not blocked?
> > Note that what this patch does it to maintain the signal handling state
> > (it saves the previous state, modifies state, restores previous state) so
> > that its unchanged.
>
> Yes, understood. I was missing the part about MCE (I knew it used
> SIGBUS, but forgot about signalfd). So this patch is good, but the
> above point about BUS_MCEERR_AR needs to be checked sooner or later.
>
> Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-09 6:23 ` Markus Armbruster
@ 2013-10-09 20:05 ` Marcelo Tosatti
2013-10-09 21:27 ` Paolo Bonzini
0 siblings, 1 reply; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-09 20:05 UTC (permalink / raw)
To: Markus Armbruster; +Cc: Paolo Bonzini, qemu-devel
On Wed, Oct 09, 2013 at 08:23:26AM +0200, Markus Armbruster wrote:
> Marcelo Tosatti <mtosatti@redhat.com> writes:
>
> > On Tue, Oct 08, 2013 at 10:02:26AM +0200, Paolo Bonzini wrote:
> >> Il 08/10/2013 09:32, Markus Armbruster ha scritto:
> >> > We have
> >> >
> >> > -mem-path FILE provide backing storage for guest RAM
> >> > -mem-prealloc preallocate guest memory (use with -mem-path)
> >> >
> >> > PATCH 2/2 adds
> >> >
> >> > -mem-path-force fail if unable to allocate RAM as specified by
> >> > -mem-path
> >> >
> >> > Looks like it's time to consolidate the options related to guest memory
> >> > into a single, QemuOpts-style -memory NAME=VALUE,... What do you guys
> >> > think?
> >>
> >> Yes, we can use "-numa memory" (or "-numa mem") that Wanlong Gao is
> >> adding. We can add path=, preallocate= and force= options there.
> >>
> >> Paolo
> >
> > It would be important for the new option to be backportable
> > independently. Therefore mixing it with -numa is not an option.
> >
> > Also due to backportability supporting a new style of command line
> > for -mem-path is problematic (management must be changed accordingly).
>
> We've converted -FOO ARG options to QemuOpts-style -FOO
> NAME=VALUE,... before. You can use QemuOptsList member implied_opt_name
> to get bare ARG accepted. Works except for ARGs containing '=' or ','.
>
> Management still has to detect whether -FOO is old or new. QMP command
> query-command-line-options should do.
>
> > Can the new option format for memory be created incrementally on
> > top of -mem-path-force? (agree its a good thing, it avoids proliferation
> > of new options).
>
> If you do it on top, it won't avoid proliferation, or am I missing
> something?
Right. But in fact, the new option is not necessary.
So please consider only patch 2 for inclusion.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-09 19:41 ` Marcelo Tosatti
@ 2013-10-09 21:26 ` Paolo Bonzini
2013-10-10 2:11 ` Marcelo Tosatti
2013-10-10 6:30 ` Paolo Bonzini
0 siblings, 2 replies; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-09 21:26 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel
Il 09/10/2013 21:41, Marcelo Tosatti ha scritto:
>> > How was that tested? For BUS_MCEERR_AO it can work, but BUS_MCEERR_AR
>> > calls force_sig_info which does this:
>> >
>> > ignored = action->sa.sa_handler == SIG_IGN;
>> > blocked = sigismember(&t->blocked, sig);
>> > if (blocked || ignored) {
>> > action->sa.sa_handler = SIG_DFL;
>> > if (blocked) {
>> > sigdelset(&t->blocked, sig);
>> > recalc_sigpending_and_wake(t);
>> > }
>> >
>> > if (action->sa.sa_handler == SIG_DFL)
>> > t->signal->flags &= ~SIGNAL_UNKILLABLE;
>> >
>> > and kills the process (because that's the default action of SIG_DFL).
> For vcpu context its not blocked?
It causes KVM to exit back to userspace, but as soon as KVM exits it
should be blocked. Thus a SIGBUS with BUS_MCEERR_AR will never be
returned by sigtimedwait.
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-09 20:05 ` Marcelo Tosatti
@ 2013-10-09 21:27 ` Paolo Bonzini
2013-10-10 15:17 ` Marcelo Tosatti
0 siblings, 1 reply; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-09 21:27 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Markus Armbruster, qemu-devel
Il 09/10/2013 22:05, Marcelo Tosatti ha scritto:
>>> > > Can the new option format for memory be created incrementally on
>>> > > top of -mem-path-force? (agree its a good thing, it avoids proliferation
>>> > > of new options).
>> >
>> > If you do it on top, it won't avoid proliferation, or am I missing
>> > something?
> Right. But in fact, the new option is not necessary.
>
> So please consider only patch 2 for inclusion.
Do you mean only patch 1?
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-09 21:26 ` Paolo Bonzini
@ 2013-10-10 2:11 ` Marcelo Tosatti
2013-10-10 6:30 ` Paolo Bonzini
1 sibling, 0 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-10 2:11 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Wed, Oct 09, 2013 at 11:26:58PM +0200, Paolo Bonzini wrote:
> Il 09/10/2013 21:41, Marcelo Tosatti ha scritto:
> >> > How was that tested? For BUS_MCEERR_AO it can work, but BUS_MCEERR_AR
> >> > calls force_sig_info which does this:
> >> >
> >> > ignored = action->sa.sa_handler == SIG_IGN;
> >> > blocked = sigismember(&t->blocked, sig);
> >> > if (blocked || ignored) {
> >> > action->sa.sa_handler = SIG_DFL;
> >> > if (blocked) {
> >> > sigdelset(&t->blocked, sig);
> >> > recalc_sigpending_and_wake(t);
> >> > }
> >> >
> >> > if (action->sa.sa_handler == SIG_DFL)
> >> > t->signal->flags &= ~SIGNAL_UNKILLABLE;
> >> >
> >> > and kills the process (because that's the default action of SIG_DFL).
> > For vcpu context its not blocked?
>
> It causes KVM to exit back to userspace, but as soon as KVM exits it
> should be blocked. Thus a SIGBUS with BUS_MCEERR_AR will never be
> returned by sigtimedwait.
Its blocked but readable via signalfd.
Its generated when vcpu touches memory, see 77db5cbd29b7cb0e0fb4fd14.
Since its rarely used, reviewing the code is not a bad idea.
For the test, see
https://lists.nongnu.org/archive/html/qemu-devel/2011-01/msg01588.html.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-09 21:26 ` Paolo Bonzini
2013-10-10 2:11 ` Marcelo Tosatti
@ 2013-10-10 6:30 ` Paolo Bonzini
1 sibling, 0 replies; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-10 6:30 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel
Il 09/10/2013 23:26, Paolo Bonzini ha scritto:
> Il 09/10/2013 21:41, Marcelo Tosatti ha scritto:
>>>> How was that tested? For BUS_MCEERR_AO it can work, but BUS_MCEERR_AR
>>>> calls force_sig_info which does this:
>>>>
>>>> ignored = action->sa.sa_handler == SIG_IGN;
>>>> blocked = sigismember(&t->blocked, sig);
>>>> if (blocked || ignored) {
>>>> action->sa.sa_handler = SIG_DFL;
>>>> if (blocked) {
>>>> sigdelset(&t->blocked, sig);
>>>> recalc_sigpending_and_wake(t);
>>>> }
>>>>
>>>> if (action->sa.sa_handler == SIG_DFL)
>>>> t->signal->flags &= ~SIGNAL_UNKILLABLE;
>>>>
>>>> and kills the process (because that's the default action of SIG_DFL).
>> For vcpu context its not blocked?
>
> It causes KVM to exit back to userspace, but as soon as KVM exits it
> should be blocked.
... but it's been queued and this bypasses the checks in force_sig_info.
So in guest mode it is accepted, in QEMU mode it causes a SIGBUS.
Paolo
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation
2013-10-09 21:27 ` Paolo Bonzini
@ 2013-10-10 15:17 ` Marcelo Tosatti
0 siblings, 0 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-10 15:17 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Markus Armbruster, qemu-devel
On Wed, Oct 09, 2013 at 11:27:37PM +0200, Paolo Bonzini wrote:
> Il 09/10/2013 22:05, Marcelo Tosatti ha scritto:
> >>> > > Can the new option format for memory be created incrementally on
> >>> > > top of -mem-path-force? (agree its a good thing, it avoids proliferation
> >>> > > of new options).
> >> >
> >> > If you do it on top, it won't avoid proliferation, or am I missing
> >> > something?
> > Right. But in fact, the new option is not necessary.
> >
> > So please consider only patch 2 for inclusion.
>
> Do you mean only patch 1?
>
> Paolo
Yes.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
2013-10-08 8:03 ` Paolo Bonzini
@ 2013-10-28 13:48 ` Paolo Bonzini
2013-10-28 19:21 ` [Qemu-devel] qemu: mempath: prefault pages manually (v3) Marcelo Tosatti
2013-10-28 16:15 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Peter Maydell
2 siblings, 1 reply; 22+ messages in thread
From: Paolo Bonzini @ 2013-10-28 13:48 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel
Il 08/10/2013 02:41, Marcelo Tosatti ha scritto:
> MAP_POPULATE mmap flag does not cause mmap to fail if allocation
> of the entire area is not performed. HugeTLBfs performs reservation
> of pages on a global basis: any further restriction to the reserved memory
> such as cpusets placement or numa node policy is performed at
> fault time only.
>
> Manually fault in pages at allocation time. This allows memory restrictions
> to be applied before guest initialization.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: qemu/exec.c
> ===================================================================
> --- qemu.orig/exec.c
> +++ qemu/exec.c
Please produce individual patches with git format-patch. This lets "git
am" do a 3-way merge, and would ignore automatically generated files
such as qemu-options.def.
Also:
> @@ -918,6 +918,13 @@ static long gethugepagesize(const char *
> return fs.f_bsize;
> }
>
> +sigjmp_buf sigjump;
Please make this static.
> +
> +static void sigbus_handler(int signal)
> +{
> + siglongjmp(sigjump, 1);
> +}
> +
> static void *file_ram_alloc(RAMBlock *block,
> ram_addr_t memory,
> const char *path)
> @@ -927,9 +934,6 @@ static void *file_ram_alloc(RAMBlock *bl
> char *c;
> void *area;
> int fd;
> -#ifdef MAP_POPULATE
> - int flags;
> -#endif
> unsigned long hpagesize;
>
> hpagesize = gethugepagesize(path);
> @@ -977,21 +981,57 @@ static void *file_ram_alloc(RAMBlock *bl
> if (ftruncate(fd, memory))
> perror("ftruncate");
>
> -#ifdef MAP_POPULATE
> - /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
> - * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
> - * to sidestep this quirk.
> - */
> - flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
> - area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> -#else
> area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> -#endif
> if (area == MAP_FAILED) {
> perror("file_ram_alloc: can't mmap RAM pages");
> close(fd);
> return (NULL);
> }
> +
> + if (mem_prealloc) {
> + int ret, i;
> + struct sigaction act, oldact;
> + sigset_t set, oldset;
> +
> + memset(&act, 0, sizeof(act));
> + act.sa_handler = &sigbus_handler;
> + act.sa_flags = 0;
> +
> + ret = sigaction(SIGBUS, &act, &oldact);
> + if (ret) {
> + perror("file_ram_alloc: fail to install signal handler");
> + exit(1);
> + }
> +
> + /* unblock SIGBUS */
> + pthread_sigmask(SIG_BLOCK, NULL, &oldset);
This is not needed, just pass &oldset in the SIG_UNBLOCK call below.
> + sigemptyset(&set);
> + sigaddset(&set, SIGBUS);
> + pthread_sigmask(SIG_UNBLOCK, &set, NULL);
> +
> + if (sigsetjmp(sigjump, 1)) {
> + fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
> + exit(1);
> + }
> +
> + /* MAP_POPULATE silently ignores failures */
> + for (i = 0; i < (memory/hpagesize)-1; i++) {
> + memset(area + (hpagesize*i), 0, 1);
> + }
> +
> + ret = sigaction(SIGBUS, &oldact, NULL);
> + if (ret) {
> + perror("file_ram_alloc: fail to reinstall signal handler");
> + exit(1);
> + }
> +
> + if (sigismember(&oldset, SIGBUS)) {
> + sigemptyset(&set);
> + sigaddset(&set, SIGBUS);
> + pthread_sigmask(SIG_BLOCK, &set, NULL);
> + }
Just use SIG_SETMASK with oldset, unconditionally.
Ok with these changes.
Paolo
> + }
> +
> block->fd = fd;
> return area;
> }
> Index: qemu/vl.c
> ===================================================================
> --- qemu.orig/vl.c
> +++ qemu/vl.c
> @@ -188,9 +188,7 @@ static int display_remote;
> const char* keyboard_layout = NULL;
> ram_addr_t ram_size;
> const char *mem_path = NULL;
> -#ifdef MAP_POPULATE
> int mem_prealloc = 0; /* force preallocation of physical target memory */
> -#endif
> int nb_nics;
> NICInfo nd_table[MAX_NICS];
> int autostart;
> @@ -3205,11 +3203,9 @@ int main(int argc, char **argv, char **e
> case QEMU_OPTION_mempath:
> mem_path = optarg;
> break;
> -#ifdef MAP_POPULATE
> case QEMU_OPTION_mem_prealloc:
> mem_prealloc = 1;
> break;
> -#endif
> case QEMU_OPTION_d:
> log_mask = optarg;
> break;
> Index: qemu/qemu-options.def
> ===================================================================
> --- qemu.orig/qemu-options.def
> +++ qemu/qemu-options.def
> @@ -66,11 +66,9 @@ stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_
> DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
>
> -#ifdef MAP_POPULATE
> DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
> "-mem-prealloc preallocate guest memory (use with -mem-path)\n",
> QEMU_ARCH_ALL)
> -#endif
>
> DEF("k", HAS_ARG, QEMU_OPTION_k,
> "-k language use keyboard layout (for example 'fr' for French)\n",
> Index: git/qemu/qemu-options.hx
> ===================================================================
> --- qemu.orig/qemu-options.hx
> +++ qemu/qemu-options.hx
> @@ -228,7 +228,6 @@ STEXI
> Allocate guest RAM from a temporarily created file in @var{path}.
> ETEXI
>
> -#ifdef MAP_POPULATE
> DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
> "-mem-prealloc preallocate guest memory (use with -mem-path)\n",
> QEMU_ARCH_ALL)
> @@ -237,7 +236,6 @@ STEXI
> @findex -mem-prealloc
> Preallocate memory when using -mem-path.
> ETEXI
> -#endif
>
> DEF("k", HAS_ARG, QEMU_OPTION_k,
> "-k language use keyboard layout (for example 'fr' for French)\n",
>
>
>
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
2013-10-08 8:03 ` Paolo Bonzini
2013-10-28 13:48 ` Paolo Bonzini
@ 2013-10-28 16:15 ` Peter Maydell
2 siblings, 0 replies; 22+ messages in thread
From: Peter Maydell @ 2013-10-28 16:15 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: QEMU Developers
On 8 October 2013 01:41, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> + ret = sigaction(SIGBUS, &oldact, NULL);
> + if (ret) {
> + perror("file_ram_alloc: fail to reinstall signal handler");
"failed".
thanks
-- PMM
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Qemu-devel] qemu: mempath: prefault pages manually (v3)
2013-10-28 13:48 ` Paolo Bonzini
@ 2013-10-28 19:21 ` Marcelo Tosatti
2013-10-28 20:51 ` [Qemu-devel] qemu: mempath: prefault pages manually (v4) Marcelo Tosatti
0 siblings, 1 reply; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-28 19:21 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
MAP_POPULATE mmap flag does not cause mmap to fail if allocation of the
entire area is not performed. HugeTLBfs performs reservation of pages
on a global basis: any further restriction to the reserved memory such
as cpusets placement or numa node policy is performed at fault time
only.
Manually fault in pages at allocation time. This allows memory
restrictions to be applied before guest initialization.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/exec.c b/exec.c
index 2e31ffc..e6bdcec 100644
--- a/exec.c
+++ b/exec.c
@@ -902,6 +902,13 @@ static long gethugepagesize(const char *path)
return fs.f_bsize;
}
+static sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+ siglongjmp(sigjump, 1);
+}
+
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path)
@@ -911,9 +918,6 @@ static void *file_ram_alloc(RAMBlock *block,
char *c;
void *area;
int fd;
-#ifdef MAP_POPULATE
- int flags;
-#endif
unsigned long hpagesize;
hpagesize = gethugepagesize(path);
@@ -961,21 +965,52 @@ static void *file_ram_alloc(RAMBlock *block,
if (ftruncate(fd, memory))
perror("ftruncate");
-#ifdef MAP_POPULATE
- /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
- * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
- * to sidestep this quirk.
- */
- flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
- area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
-#else
area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-#endif
if (area == MAP_FAILED) {
perror("file_ram_alloc: can't mmap RAM pages");
close(fd);
return (NULL);
}
+
+ if (mem_prealloc) {
+ int ret, i;
+ struct sigaction act, oldact;
+ sigset_t set, oldset;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+
+ ret = sigaction(SIGBUS, &act, &oldact);
+ if (ret) {
+ perror("file_ram_alloc: fail to install signal handler");
+ exit(1);
+ }
+
+ /* unblock SIGBUS */
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+ if (sigsetjmp(sigjump, 1)) {
+ fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
+ exit(1);
+ }
+
+ /* MAP_POPULATE silently ignores failures */
+ for (i = 0; i < (memory/hpagesize)-1; i++) {
+ memset(area + (hpagesize*i), 0, 1);
+ }
+
+ ret = sigaction(SIGBUS, &oldact, NULL);
+ if (ret) {
+ perror("file_ram_alloc: fail to reinstall signal handler");
+ exit(1);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ }
+
block->fd = fd;
return area;
}
diff --git a/qemu-options.hx b/qemu-options.hx
index 5dc8b75..3674b3c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -228,7 +228,6 @@ STEXI
Allocate guest RAM from a temporarily created file in @var{path}.
ETEXI
-#ifdef MAP_POPULATE
DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
"-mem-prealloc preallocate guest memory (use with -mem-path)\n",
QEMU_ARCH_ALL)
@@ -237,7 +236,6 @@ STEXI
@findex -mem-prealloc
Preallocate memory when using -mem-path.
ETEXI
-#endif
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n",
diff --git a/vl.c b/vl.c
index b42ac67..1e28c5c 100644
--- a/vl.c
+++ b/vl.c
@@ -188,9 +188,7 @@ static int display_remote;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
const char *mem_path = NULL;
-#ifdef MAP_POPULATE
int mem_prealloc = 0; /* force preallocation of physical target memory */
-#endif
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart;
@@ -3205,11 +3203,9 @@ int main(int argc, char **argv, char **envp)
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
-#ifdef MAP_POPULATE
case QEMU_OPTION_mem_prealloc:
mem_prealloc = 1;
break;
-#endif
case QEMU_OPTION_d:
log_mask = optarg;
break;
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] qemu: mempath: prefault pages manually (v4)
2013-10-28 19:21 ` [Qemu-devel] qemu: mempath: prefault pages manually (v3) Marcelo Tosatti
@ 2013-10-28 20:51 ` Marcelo Tosatti
0 siblings, 0 replies; 22+ messages in thread
From: Marcelo Tosatti @ 2013-10-28 20:51 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
v4: s/fail/failed/ (Peter Maydell)
---
MAP_POPULATE mmap flag does not cause mmap to fail if allocation of the
entire area is not performed. HugeTLBfs performs reservation of pages
on a global basis: any further restriction to the reserved memory such
as cpusets placement or numa node policy is performed at fault time
only.
Manually fault in pages at allocation time. This allows memory
restrictions to be applied before guest initialization.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/exec.c b/exec.c
index 2e31ffc..e6bdcec 100644
--- a/exec.c
+++ b/exec.c
@@ -902,6 +902,13 @@ static long gethugepagesize(const char *path)
return fs.f_bsize;
}
+static sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+ siglongjmp(sigjump, 1);
+}
+
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path)
@@ -911,9 +918,6 @@ static void *file_ram_alloc(RAMBlock *block,
char *c;
void *area;
int fd;
-#ifdef MAP_POPULATE
- int flags;
-#endif
unsigned long hpagesize;
hpagesize = gethugepagesize(path);
@@ -961,21 +965,52 @@ static void *file_ram_alloc(RAMBlock *block,
if (ftruncate(fd, memory))
perror("ftruncate");
-#ifdef MAP_POPULATE
- /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
- * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
- * to sidestep this quirk.
- */
- flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
- area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
-#else
area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-#endif
if (area == MAP_FAILED) {
perror("file_ram_alloc: can't mmap RAM pages");
close(fd);
return (NULL);
}
+
+ if (mem_prealloc) {
+ int ret, i;
+ struct sigaction act, oldact;
+ sigset_t set, oldset;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+
+ ret = sigaction(SIGBUS, &act, &oldact);
+ if (ret) {
+ perror("file_ram_alloc: failed to install signal handler");
+ exit(1);
+ }
+
+ /* unblock SIGBUS */
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+ if (sigsetjmp(sigjump, 1)) {
+ fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
+ exit(1);
+ }
+
+ /* MAP_POPULATE silently ignores failures */
+ for (i = 0; i < (memory/hpagesize)-1; i++) {
+ memset(area + (hpagesize*i), 0, 1);
+ }
+
+ ret = sigaction(SIGBUS, &oldact, NULL);
+ if (ret) {
+ perror("file_ram_alloc: failed to reinstall signal handler");
+ exit(1);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ }
+
block->fd = fd;
return area;
}
diff --git a/qemu-options.hx b/qemu-options.hx
index 5dc8b75..3674b3c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -228,7 +228,6 @@ STEXI
Allocate guest RAM from a temporarily created file in @var{path}.
ETEXI
-#ifdef MAP_POPULATE
DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
"-mem-prealloc preallocate guest memory (use with -mem-path)\n",
QEMU_ARCH_ALL)
@@ -237,7 +236,6 @@ STEXI
@findex -mem-prealloc
Preallocate memory when using -mem-path.
ETEXI
-#endif
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n",
diff --git a/vl.c b/vl.c
index b42ac67..1e28c5c 100644
--- a/vl.c
+++ b/vl.c
@@ -188,9 +188,7 @@ static int display_remote;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
const char *mem_path = NULL;
-#ifdef MAP_POPULATE
int mem_prealloc = 0; /* force preallocation of physical target memory */
-#endif
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart;
@@ -3205,11 +3203,9 @@ int main(int argc, char **argv, char **envp)
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
-#ifdef MAP_POPULATE
case QEMU_OPTION_mem_prealloc:
mem_prealloc = 1;
break;
-#endif
case QEMU_OPTION_d:
log_mask = optarg;
break;
^ permalink raw reply related [flat|nested] 22+ messages in thread
end of thread, other threads:[~2013-10-28 20:58 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-10-08 0:41 [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Marcelo Tosatti
2013-10-08 0:41 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Marcelo Tosatti
2013-10-08 8:03 ` Paolo Bonzini
2013-10-08 21:51 ` Marcelo Tosatti
2013-10-09 8:05 ` Paolo Bonzini
2013-10-09 19:41 ` Marcelo Tosatti
2013-10-09 21:26 ` Paolo Bonzini
2013-10-10 2:11 ` Marcelo Tosatti
2013-10-10 6:30 ` Paolo Bonzini
2013-10-28 13:48 ` Paolo Bonzini
2013-10-28 19:21 ` [Qemu-devel] qemu: mempath: prefault pages manually (v3) Marcelo Tosatti
2013-10-28 20:51 ` [Qemu-devel] qemu: mempath: prefault pages manually (v4) Marcelo Tosatti
2013-10-28 16:15 ` [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually Peter Maydell
2013-10-08 0:41 ` [Qemu-devel] [patch 2/2] qemu: add -mem-path-force option to force RAM allocation via -mem-path Marcelo Tosatti
2013-10-08 7:32 ` [Qemu-devel] [patch 0/2] force -mem-path RAM allocation Markus Armbruster
2013-10-08 8:02 ` Paolo Bonzini
2013-10-08 22:03 ` Marcelo Tosatti
2013-10-09 6:23 ` Markus Armbruster
2013-10-09 20:05 ` Marcelo Tosatti
2013-10-09 21:27 ` Paolo Bonzini
2013-10-10 15:17 ` Marcelo Tosatti
2013-10-09 7:43 ` Paolo Bonzini
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.