All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer
@ 2012-10-16  7:30 Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 1/5] exec: Split up and tidy code_gen_buffer Richard Henderson
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Richard Henderson @ 2012-10-16  7:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: blauwirbel

Changes v3->v4:
  * Patch 5 actually included,
  * Patch 1 updates printf format for changed size_t.


r~


Richard Henderson (5):
  exec: Split up and tidy code_gen_buffer
  exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large
  exec: Do not use absolute address hints for code_gen_buffer with
    -fpie
  exec: Allocate code_gen_prologue from code_gen_buffer
  exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c

 exec-all.h |   2 -
 exec.c     | 238 +++++++++++++++++++++++++++++++++----------------------------
 tcg/tcg.h  |   2 +-
 3 files changed, 128 insertions(+), 114 deletions(-)

-- 
1.7.11.7

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 1/5] exec: Split up and tidy code_gen_buffer
  2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
@ 2012-10-16  7:30 ` Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 2/5] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large Richard Henderson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2012-10-16  7:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: blauwirbel

It now consists of:

A macro definition of MAX_CODE_GEN_BUFFER_SIZE with host-specific values,

A function size_code_gen_buffer that applies most of the reasoning for
choosing a buffer size,

Three variations of a function alloc_code_gen_buffer that contain all
of the logic for allocating executable memory via a given allocation
mechanism.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c | 195 ++++++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 103 insertions(+), 92 deletions(-)

diff --git a/exec.c b/exec.c
index 7899042..eecae2f 100644
--- a/exec.c
+++ b/exec.c
@@ -103,9 +103,9 @@ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
 uint8_t code_gen_prologue[1024] code_gen_section;
 static uint8_t *code_gen_buffer;
-static unsigned long code_gen_buffer_size;
+static size_t code_gen_buffer_size;
 /* threshold to flush the translated code buffer */
-static unsigned long code_gen_buffer_max_size;
+static size_t code_gen_buffer_max_size;
 static uint8_t *code_gen_ptr;
 
 #if !defined(CONFIG_USER_ONLY)
@@ -497,110 +497,121 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 #define mmap_unlock() do { } while(0)
 #endif
 
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
-
 #if defined(CONFIG_USER_ONLY)
 /* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used */
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-               __attribute__((aligned (CODE_GEN_ALIGN)));
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
 #endif
 
-static void code_gen_alloc(unsigned long tb_size)
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
+#elif defined(__s390x__)
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
 {
+    /* Size the buffer.  */
+    if (tb_size == 0) {
 #ifdef USE_STATIC_CODE_GEN_BUFFER
-    code_gen_buffer = static_code_gen_buffer;
-    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#else
-    code_gen_buffer_size = tb_size;
-    if (code_gen_buffer_size == 0) {
-#if defined(CONFIG_USER_ONLY)
-        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 #else
-        /* XXX: needs adjustments */
-        code_gen_buffer_size = (unsigned long)(ram_size / 4);
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
 #endif
     }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
-        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
-
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
     }
+    code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+    map_exec(static_code_gen_buffer, code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
+
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (code_gen_buffer_size > 800u * 1024 * 1024) {
+        code_gen_buffer_size = 800u * 1024 * 1024;
+    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
 #else
-    code_gen_buffer = g_malloc(code_gen_buffer_size);
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#endif
-#endif /* !USE_STATIC_CODE_GEN_BUFFER */
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(code_gen_buffer_size);
+    if (buf) {
+        map_exec(buf, code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    code_gen_buffer = alloc_code_gen_buffer();
+    if (code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
@@ -4188,7 +4199,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     }
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %td/%ld\n",
+    cpu_fprintf(f, "gen code size       %td/%zd\n",
                 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n", 
                 nb_tbs, code_gen_max_blocks);
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 2/5] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large
  2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 1/5] exec: Split up and tidy code_gen_buffer Richard Henderson
@ 2012-10-16  7:30 ` Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2012-10-16  7:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: blauwirbel

For ARM we cap the buffer size to 16MB.  Do not allocate 32MB in that case.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/exec.c b/exec.c
index eecae2f..6c0b2d7 100644
--- a/exec.c
+++ b/exec.c
@@ -529,7 +529,11 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 #endif
 
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 
 static inline size_t size_code_gen_buffer(size_t tb_size)
 {
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie
  2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 1/5] exec: Split up and tidy code_gen_buffer Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 2/5] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large Richard Henderson
@ 2012-10-16  7:30 ` Richard Henderson
  2012-11-18 20:48   ` Stefan Weil
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 4/5] exec: Allocate code_gen_prologue from code_gen_buffer Richard Henderson
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 9+ messages in thread
From: Richard Henderson @ 2012-10-16  7:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: blauwirbel

The hard-coded addresses inside alloc_code_gen_buffer only make sense
if we're building an executable that will actually run at the address
we've put into the linker scripts.

When we're building with -fpie, the executable will run at some
random location chosen by the kernel.  We get better placement for
the code_gen_buffer if we allow the kernel to place the memory,
as it will tend to to place it near the executable, based on the
PROT_EXEC bit.

Since code_gen_prologue is always inside the executable, this effect
is easily seen at the end of most TB, with the exit_tb opcode, and
with any calls to helper functions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/exec.c b/exec.c
index 6c0b2d7..5e33a3d 100644
--- a/exec.c
+++ b/exec.c
@@ -578,7 +578,12 @@ static inline void *alloc_code_gen_buffer(void)
     /* Constrain the position of the buffer based on the host cpu.
        Note that these addresses are chosen in concert with the
        addresses assigned in the relevant linker script file.  */
-# if defined(__x86_64__) && defined(MAP_32BIT)
+# if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
     /* Force the memory down into low memory with the executable.
        Leave the choice of exact location with the kernel.  */
     flags |= MAP_32BIT;
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 4/5] exec: Allocate code_gen_prologue from code_gen_buffer
  2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
                   ` (2 preceding siblings ...)
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
@ 2012-10-16  7:30 ` Richard Henderson
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 5/5] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c Richard Henderson
  2012-10-20  8:47 ` [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Blue Swirl
  5 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2012-10-16  7:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: blauwirbel

We had a hack for arm and sparc, allocating code_gen_prologue to a
special section.  Which, honestly does no good under certain cases.
We've already got limits on code_gen_buffer_size to ensure that all
TBs can use direct branches between themselves; reuse this limit to
ensure the prologue is also reachable.

As a bonus, we get to avoid marking a page of the main executable's
data segment as executable.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c    | 30 +++++++++++-------------------
 tcg/tcg.h |  2 +-
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/exec.c b/exec.c
index 5e33a3d..8958b28 100644
--- a/exec.c
+++ b/exec.c
@@ -86,22 +86,7 @@ static int nb_tbs;
 /* any access to the tbs or the page table must use this lock */
 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
-#if defined(__arm__) || defined(__sparc__)
-/* The prologue must be reachable with a direct jump. ARM and Sparc64
- have limited branch ranges (possibly also PPC) so place it in a
- section close to code segment. */
-#define code_gen_section                                \
-    __attribute__((__section__(".gen_code")))           \
-    __attribute__((aligned (32)))
-#elif defined(_WIN32) && !defined(_WIN64)
-#define code_gen_section                                \
-    __attribute__((aligned (16)))
-#else
-#define code_gen_section                                \
-    __attribute__((aligned (32)))
-#endif
-
-uint8_t code_gen_prologue[1024] code_gen_section;
+uint8_t *code_gen_prologue;
 static uint8_t *code_gen_buffer;
 static size_t code_gen_buffer_size;
 /* threshold to flush the translated code buffer */
@@ -221,7 +206,7 @@ static int tb_flush_count;
 static int tb_phys_invalidate_count;
 
 #ifdef _WIN32
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
 {
     DWORD old_protect;
     VirtualProtect(addr, size,
@@ -229,7 +214,7 @@ static void map_exec(void *addr, long size)
     
 }
 #else
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
 {
     unsigned long start, end, page_size;
     
@@ -621,7 +606,14 @@ static inline void code_gen_alloc(size_t tb_size)
         exit(1);
     }
 
-    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
+    /* Steal room for the prologue at the end of the buffer.  This ensures
+       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+       from TB's to the prologue are going to be in range.  It also means
+       that we don't need to mark (additional) portions of the data segment
+       as executable.  */
+    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
+    code_gen_buffer_size -= 1024;
+
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7bafe0e..45e94f5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -616,7 +616,7 @@ TCGv_i64 tcg_const_i64(int64_t val);
 TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
-extern uint8_t code_gen_prologue[];
+extern uint8_t *code_gen_prologue;
 
 /* TCG targets may use a different definition of tcg_qemu_tb_exec. */
 #if !defined(tcg_qemu_tb_exec)
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 5/5] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c
  2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
                   ` (3 preceding siblings ...)
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 4/5] exec: Allocate code_gen_prologue from code_gen_buffer Richard Henderson
@ 2012-10-16  7:30 ` Richard Henderson
  2012-10-20  8:47 ` [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Blue Swirl
  5 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2012-10-16  7:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: blauwirbel

It is used nowhere else, and the corresponding MAX_CODE_GEN_BUFFER_SIZE
also lives there.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec-all.h | 2 --
 exec.c     | 4 ++++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/exec-all.h b/exec-all.h
index 6516da0..7f29820 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -121,8 +121,6 @@ static inline void tlb_flush(CPUArchState *env, int flush_global)
 #define CODE_GEN_PHYS_HASH_BITS     15
 #define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
 
-#define MIN_CODE_GEN_BUFFER_SIZE     (1024 * 1024)
-
 /* estimated block size for TB allocation */
 /* XXX: use a per code average code fragment size and modulate it
    according to the host CPU */
diff --git a/exec.c b/exec.c
index 8958b28..4a86b0f 100644
--- a/exec.c
+++ b/exec.c
@@ -498,6 +498,10 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 # define USE_MMAP
 #endif
 
+/* Minimum size of the code gen buffer.  This number is randomly chosen,
+   but not so small that we can't have a fair number of TB's live.  */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
+
 /* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
    indicated, this is constrained by the range of direct branches on the
    host cpu, as used by the TCG implementation of goto_tb.  */
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer
  2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
                   ` (4 preceding siblings ...)
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 5/5] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c Richard Henderson
@ 2012-10-20  8:47 ` Blue Swirl
  5 siblings, 0 replies; 9+ messages in thread
From: Blue Swirl @ 2012-10-20  8:47 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

Thanks, applied all.

On Tue, Oct 16, 2012 at 7:30 AM, Richard Henderson <rth@twiddle.net> wrote:
> Changes v3->v4:
>   * Patch 5 actually included,
>   * Patch 1 updates printf format for changed size_t.
>
>
> r~
>
>
> Richard Henderson (5):
>   exec: Split up and tidy code_gen_buffer
>   exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large
>   exec: Do not use absolute address hints for code_gen_buffer with
>     -fpie
>   exec: Allocate code_gen_prologue from code_gen_buffer
>   exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c
>
>  exec-all.h |   2 -
>  exec.c     | 238 +++++++++++++++++++++++++++++++++----------------------------
>  tcg/tcg.h  |   2 +-
>  3 files changed, 128 insertions(+), 114 deletions(-)
>
> --
> 1.7.11.7
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie
  2012-10-16  7:30 ` [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
@ 2012-11-18 20:48   ` Stefan Weil
  2012-11-19 16:27     ` Richard Henderson
  0 siblings, 1 reply; 9+ messages in thread
From: Stefan Weil @ 2012-11-18 20:48 UTC (permalink / raw)
  To: Richard Henderson; +Cc: blauwirbel, qemu-devel

Am 16.10.2012 09:30, schrieb Richard Henderson:
> The hard-coded addresses inside alloc_code_gen_buffer only make sense
> if we're building an executable that will actually run at the address
> we've put into the linker scripts.
>
> When we're building with -fpie, the executable will run at some
> random location chosen by the kernel.  We get better placement for
> the code_gen_buffer if we allow the kernel to place the memory,
> as it will tend to to place it near the executable, based on the
> PROT_EXEC bit.
>
> Since code_gen_prologue is always inside the executable, this effect
> is easily seen at the end of most TB, with the exit_tb opcode, and
> with any calls to helper functions.
>
> Signed-off-by: Richard Henderson<rth@twiddle.net>
> ---
>   exec.c | 7 ++++++-
>   1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/exec.c b/exec.c
> index 6c0b2d7..5e33a3d 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -578,7 +578,12 @@ static inline void *alloc_code_gen_buffer(void)
>       /* Constrain the position of the buffer based on the host cpu.
>          Note that these addresses are chosen in concert with the
>          addresses assigned in the relevant linker script file.  */
> -# if defined(__x86_64__)&&  defined(MAP_32BIT)
> +# if defined(__PIE__) || defined(__PIC__)
> +    /* Don't bother setting a preferred location if we're building
> +       a position-independent executable.  We're more likely to get
> +       an address near the main executable if we let the kernel
> +       choose the address.  */
> +# elif defined(__x86_64__)&&  defined(MAP_32BIT)
>       /* Force the memory down into low memory with the executable.
>          Leave the choice of exact location with the kernel.  */
>       flags |= MAP_32BIT;


This patch breaks the TCG interpreter. Here is a test run on Debian 
x86_64 (output shortened):

$ ./configure --enable-debug --enable-tcg-interpreter 
--target-list=i386-softmmu --disable-docs
$ make
$ gdb --args i386-softmmu/qemu-system-i386 -L pc-bios
(gdb) r
Starting program: i386-softmmu/qemu-system-i386 -L pc-bios
[Thread debugging using libthread_db enabled]
[New Thread 0x7fffe8f73700 (LWP 1446)]
[New Thread 0x7fffe0470700 (LWP 1447)]

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffe8f73700 (LWP 1446)]
0x00005555558e7ded in tcg_qemu_tb_exec (cpustate=0x55555656f7e0, 
tb_ptr=0xeab74acb <Address 0xeab74acb out of bounds>) at tci.c:445
445             TCGOpcode opc = tb_ptr[0];
(gdb) q

QEMU crashes early while executing a jmp opcode.

This patch restores functionality:

diff --git a/exec.c b/exec.c
index 8435de0..44e4504 100644
--- a/exec.c
+++ b/exec.c
@@ -564,7 +564,7 @@ static inline void *alloc_code_gen_buffer(void)
      /* Constrain the position of the buffer based on the host cpu.
         Note that these addresses are chosen in concert with the
         addresses assigned in the relevant linker script file.  */
-# if defined(__PIE__) || defined(__PIC__)
+# if !defined(CONFIG_TCG_INTERPRETER) && (defined(__PIE__) || 
defined(__PIC__))
      /* Don't bother setting a preferred location if we're building
         a position-independent executable.  We're more likely to get
         an address near the main executable if we let the kernel

Regards

Stefan W.

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie
  2012-11-18 20:48   ` Stefan Weil
@ 2012-11-19 16:27     ` Richard Henderson
  0 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2012-11-19 16:27 UTC (permalink / raw)
  To: Stefan Weil; +Cc: blauwirbel, qemu-devel

On 2012-11-18 12:48, Stefan Weil wrote:
> This patch breaks the TCG interpreter. Here is a test run on Debian x86_64 (output shortened):

Nack.  This is hiding some bug elsewhere in the tcg interpreter.

I disbelieve that the interpreter *requires* a pointer in the
low 32-bits of the x86_64 address space.


r~

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-11-19 16:27 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-16  7:30 [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Richard Henderson
2012-10-16  7:30 ` [Qemu-devel] [PATCH 1/5] exec: Split up and tidy code_gen_buffer Richard Henderson
2012-10-16  7:30 ` [Qemu-devel] [PATCH 2/5] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large Richard Henderson
2012-10-16  7:30 ` [Qemu-devel] [PATCH 3/5] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
2012-11-18 20:48   ` Stefan Weil
2012-11-19 16:27     ` Richard Henderson
2012-10-16  7:30 ` [Qemu-devel] [PATCH 4/5] exec: Allocate code_gen_prologue from code_gen_buffer Richard Henderson
2012-10-16  7:30 ` [Qemu-devel] [PATCH 5/5] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c Richard Henderson
2012-10-20  8:47 ` [Qemu-devel] [PATCH v4 0/5] Better allocation of code_gen_buffer Blue Swirl

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.