From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43841) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YG7Oq-000288-0H for qemu-devel@nongnu.org; Tue, 27 Jan 2015 09:45:53 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YG7Ok-0008Ch-UF for qemu-devel@nongnu.org; Tue, 27 Jan 2015 09:45:51 -0500 Received: from static.88-198-71-155.clients.your-server.de ([88.198.71.155]:50528 helo=socrates.bennee.com) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YG7Ok-0008CS-KR for qemu-devel@nongnu.org; Tue, 27 Jan 2015 09:45:46 -0500 References: <1421428797-23697-1-git-send-email-fred.konrad@greensocs.com> <1421428797-23697-3-git-send-email-fred.konrad@greensocs.com> From: Alex =?utf-8?Q?Benn=C3=A9e?= In-reply-to: <1421428797-23697-3-git-send-email-fred.konrad@greensocs.com> Date: Tue, 27 Jan 2015 14:45:52 +0000 Message-ID: <87lhko704f.fsf@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [RFC 02/10] use a different translation block list for each cpu. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: fred.konrad@greensocs.com Cc: mttcg@listserver.greensocs.com, peter.maydell@linaro.org, jan.kiszka@siemens.com, mark.burton@greensocs.com, qemu-devel@nongnu.org, agraf@suse.de, pbonzini@redhat.com fred.konrad@greensocs.com writes: > From: KONRAD Frederic > > We need a different TranslationBlock list for each core in case of multithread > TCG. > > Signed-off-by: KONRAD Frederic > --- > translate-all.c | 40 ++++++++++++++++++++++------------------ > 1 file changed, 22 insertions(+), 18 deletions(-) > > diff --git a/translate-all.c b/translate-all.c > index 8fa4378..0e11c70 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -72,10 +72,11 @@ > #endif > > #define SMC_BITMAP_USE_THRESHOLD 10 > +#define MAX_CPUS 256 Where does this number come from? > typedef struct PageDesc { > /* list of TBs intersecting this ram page */ > - TranslationBlock *first_tb; > + TranslationBlock *first_tb[MAX_CPUS]; Especially given the size of the PageDesc structure this adds a lot of of bulk, mostly unused. Is the access to the TB list via PageDesc that frequent to avoid an additional indirection? > /* in order to optimize self modifying code, we count the number > of lookups we do to a given page to use a bitmap */ > unsigned int code_write_count; > @@ -750,7 +751,7 @@ static inline void invalidate_page_bitmap(PageDesc *p) > /* Set to NULL all the 'first_tb' fields in all PageDescs. */ > static void page_flush_tb_1(int level, void **lp) > { > - int i; > + int i, j; > > if (*lp == NULL) { > return; > @@ -759,7 +760,9 @@ static void page_flush_tb_1(int level, void **lp) > PageDesc *pd = *lp; > > for (i = 0; i < V_L2_SIZE; ++i) { > - pd[i].first_tb = NULL; > + for (j = 0; j < MAX_CPUS; j++) { > + pd[i].first_tb[j] = NULL; > + } > invalidate_page_bitmap(pd + i); > } > } else { > @@ -937,12 +940,12 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) > /* remove the TB from the page list */ > if (tb->page_addr[0] != page_addr) { > p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); > - tb_page_remove(&p->first_tb, tb); > + tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb); > invalidate_page_bitmap(p); > } > if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) { > p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); > - tb_page_remove(&p->first_tb, tb); > + tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb); > invalidate_page_bitmap(p); > } > > @@ -1012,7 +1015,7 @@ static void build_page_bitmap(PageDesc *p) > > p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8); > > - tb = p->first_tb; > + tb = p->first_tb[current_cpu->cpu_index]; > while (tb != NULL) { > n = (uintptr_t)tb & 3; > tb = (TranslationBlock *)((uintptr_t)tb & ~3); > @@ -1138,7 +1141,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > /* we remove all the TBs in the range [start, end[ */ > /* XXX: see if in some cases it could be faster to invalidate all > the code */ > - tb = p->first_tb; > + tb = p->first_tb[cpu->cpu_index]; > while (tb != NULL) { > n = (uintptr_t)tb & 3; > tb = (TranslationBlock *)((uintptr_t)tb & ~3); > @@ -1196,7 +1199,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > } > #if !defined(CONFIG_USER_ONLY) > /* if no code remaining, no need to continue to use slow writes */ > - if (!p->first_tb) { > + if (!p->first_tb[cpu->cpu_index]) { > invalidate_page_bitmap(p); > if (is_cpu_write_access) { > tlb_unprotect_code_phys(cpu, start, cpu->mem_io_vaddr); > @@ -1224,10 +1227,10 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) > #if 0 > if (1) { > qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n", > - cpu_single_env->mem_io_vaddr, len, > - cpu_single_env->eip, > - cpu_single_env->eip + > - (intptr_t)cpu_single_env->segs[R_CS].base); > + current_cpu->mem_io_vaddr, len, > + current_cpu->eip, > + current_cpu->eip + > + (intptr_t)current_cpu->segs[R_CS].base); > } > #endif > p = page_find(start >> TARGET_PAGE_BITS); > @@ -1269,7 +1272,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, > if (!p) { > return; > } > - tb = p->first_tb; > + tb = p->first_tb[current_cpu->cpu_index]; > #ifdef TARGET_HAS_PRECISE_SMC > if (tb && pc != 0) { > current_tb = tb_find_pc(pc); > @@ -1299,7 +1302,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, > tb_phys_invalidate(tb, addr); > tb = tb->page_next[n]; > } > - p->first_tb = NULL; > + p->first_tb[current_cpu->cpu_index] = NULL; > #ifdef TARGET_HAS_PRECISE_SMC > if (current_tb_modified) { > /* we generate a block containing just the instruction > @@ -1327,11 +1330,12 @@ static inline void tb_alloc_page(TranslationBlock *tb, > > tb->page_addr[n] = page_addr; > p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); > - tb->page_next[n] = p->first_tb; > + tb->page_next[n] = p->first_tb[current_cpu->cpu_index]; > #ifndef CONFIG_USER_ONLY > - page_already_protected = p->first_tb != NULL; > + page_already_protected = p->first_tb[current_cpu->cpu_index] != NULL; > #endif > - p->first_tb = (TranslationBlock *)((uintptr_t)tb | n); > + p->first_tb[current_cpu->cpu_index] > + = (TranslationBlock *)((uintptr_t)tb | n); > invalidate_page_bitmap(p); > > #if defined(TARGET_HAS_SMC) || 1 > @@ -1821,7 +1825,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) > the code inside. */ > if (!(p->flags & PAGE_WRITE) && > (flags & PAGE_WRITE) && > - p->first_tb) { > + p->first_tb[current_cpu->cpu_index]) { > tb_invalidate_phys_page(addr, 0, NULL, false); > } > p->flags = flags; As the TranslationBlock itself has a linked list for page related blocks: struct TranslationBlock *page_next[2]; could we not just come up with a structure that chains them together here? -- Alex Bennée