From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:50878) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YG7sV-0007sA-HC for qemu-devel@nongnu.org; Tue, 27 Jan 2015 10:16:33 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YG7sS-0002L9-OG for qemu-devel@nongnu.org; Tue, 27 Jan 2015 10:16:31 -0500 Received: from greensocs.com ([193.104.36.180]:13830) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YG7sS-0002KW-CN for qemu-devel@nongnu.org; Tue, 27 Jan 2015 10:16:28 -0500 Message-ID: <54C7ABC8.5020802@greensocs.com> Date: Tue, 27 Jan 2015 16:16:24 +0100 From: Frederic Konrad MIME-Version: 1.0 References: <1421428797-23697-1-git-send-email-fred.konrad@greensocs.com> <1421428797-23697-3-git-send-email-fred.konrad@greensocs.com> <87lhko704f.fsf@linaro.org> In-Reply-To: <87lhko704f.fsf@linaro.org> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [RFC 02/10] use a different translation block list for each cpu. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?UTF-8?B?QWxleCBCZW5uw6ll?= Cc: mttcg@listserver.greensocs.com, peter.maydell@linaro.org, jan.kiszka@siemens.com, mark.burton@greensocs.com, qemu-devel@nongnu.org, agraf@suse.de, pbonzini@redhat.com On 27/01/2015 15:45, Alex Benn=C3=A9e wrote: > fred.konrad@greensocs.com writes: > >> From: KONRAD Frederic >> >> We need a different TranslationBlock list for each core in case of mul= tithread >> TCG. >> >> Signed-off-by: KONRAD Frederic >> --- >> translate-all.c | 40 ++++++++++++++++++++++------------------ >> 1 file changed, 22 insertions(+), 18 deletions(-) >> >> diff --git a/translate-all.c b/translate-all.c >> index 8fa4378..0e11c70 100644 >> --- a/translate-all.c >> +++ b/translate-all.c >> @@ -72,10 +72,11 @@ >> #endif >> =20 >> #define SMC_BITMAP_USE_THRESHOLD 10 >> +#define MAX_CPUS 256 > Where does this number come from? > >> typedef struct PageDesc { >> /* list of TBs intersecting this ram page */ >> - TranslationBlock *first_tb; >> + TranslationBlock *first_tb[MAX_CPUS]; > Especially given the size of the PageDesc structure this adds a lot of > of bulk, mostly unused. Is the access to the TB list via PageDesc that > frequent to avoid an additional indirection? > >> /* in order to optimize self modifying code, we count the number >> of lookups we do to a given page to use a bitmap */ >> unsigned int code_write_count; >> @@ -750,7 +751,7 @@ static inline void invalidate_page_bitmap(PageDesc= *p) >> /* Set to NULL all the 'first_tb' fields in all PageDescs. */ >> static void page_flush_tb_1(int level, void **lp) >> { >> - int i; >> + int i, j; >> =20 >> if (*lp =3D=3D NULL) { >> return; >> @@ -759,7 +760,9 @@ static void page_flush_tb_1(int level, void **lp) >> PageDesc *pd =3D *lp; >> =20 >> for (i =3D 0; i < V_L2_SIZE; ++i) { >> - pd[i].first_tb =3D NULL; >> + for (j =3D 0; j < MAX_CPUS; j++) { >> + pd[i].first_tb[j] =3D NULL; >> + } >> invalidate_page_bitmap(pd + i); >> } >> } else { >> @@ -937,12 +940,12 @@ void tb_phys_invalidate(TranslationBlock *tb, tb= _page_addr_t page_addr) >> /* remove the TB from the page list */ >> if (tb->page_addr[0] !=3D page_addr) { >> p =3D page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); >> - tb_page_remove(&p->first_tb, tb); >> + tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb); >> invalidate_page_bitmap(p); >> } >> if (tb->page_addr[1] !=3D -1 && tb->page_addr[1] !=3D page_addr)= { >> p =3D page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); >> - tb_page_remove(&p->first_tb, tb); >> + tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb); >> invalidate_page_bitmap(p); >> } >> =20 >> @@ -1012,7 +1015,7 @@ static void build_page_bitmap(PageDesc *p) >> =20 >> p->code_bitmap =3D g_malloc0(TARGET_PAGE_SIZE / 8); >> =20 >> - tb =3D p->first_tb; >> + tb =3D p->first_tb[current_cpu->cpu_index]; >> while (tb !=3D NULL) { >> n =3D (uintptr_t)tb & 3; >> tb =3D (TranslationBlock *)((uintptr_t)tb & ~3); >> @@ -1138,7 +1141,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_= t start, tb_page_addr_t end, >> /* we remove all the TBs in the range [start, end[ */ >> /* XXX: see if in some cases it could be faster to invalidate al= l >> the code */ >> - tb =3D p->first_tb; >> + tb =3D p->first_tb[cpu->cpu_index]; >> while (tb !=3D NULL) { >> n =3D (uintptr_t)tb & 3; >> tb =3D (TranslationBlock *)((uintptr_t)tb & ~3); >> @@ -1196,7 +1199,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_= t start, tb_page_addr_t end, >> } >> #if !defined(CONFIG_USER_ONLY) >> /* if no code remaining, no need to continue to use slow writes = */ >> - if (!p->first_tb) { >> + if (!p->first_tb[cpu->cpu_index]) { >> invalidate_page_bitmap(p); >> if (is_cpu_write_access) { >> tlb_unprotect_code_phys(cpu, start, cpu->mem_io_vaddr); >> @@ -1224,10 +1227,10 @@ void tb_invalidate_phys_page_fast(tb_page_addr= _t start, int len) >> #if 0 >> if (1) { >> qemu_log("modifying code at 0x%x size=3D%d EIP=3D%x PC=3D%08= x\n", >> - cpu_single_env->mem_io_vaddr, len, >> - cpu_single_env->eip, >> - cpu_single_env->eip + >> - (intptr_t)cpu_single_env->segs[R_CS].base); >> + current_cpu->mem_io_vaddr, len, >> + current_cpu->eip, >> + current_cpu->eip + >> + (intptr_t)current_cpu->segs[R_CS].base); >> } >> #endif >> p =3D page_find(start >> TARGET_PAGE_BITS); >> @@ -1269,7 +1272,7 @@ static void tb_invalidate_phys_page(tb_page_addr= _t addr, >> if (!p) { >> return; >> } >> - tb =3D p->first_tb; >> + tb =3D p->first_tb[current_cpu->cpu_index]; >> #ifdef TARGET_HAS_PRECISE_SMC >> if (tb && pc !=3D 0) { >> current_tb =3D tb_find_pc(pc); >> @@ -1299,7 +1302,7 @@ static void tb_invalidate_phys_page(tb_page_addr= _t addr, >> tb_phys_invalidate(tb, addr); >> tb =3D tb->page_next[n]; >> } >> - p->first_tb =3D NULL; >> + p->first_tb[current_cpu->cpu_index] =3D NULL; >> #ifdef TARGET_HAS_PRECISE_SMC >> if (current_tb_modified) { >> /* we generate a block containing just the instruction >> @@ -1327,11 +1330,12 @@ static inline void tb_alloc_page(TranslationBl= ock *tb, >> =20 >> tb->page_addr[n] =3D page_addr; >> p =3D page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); >> - tb->page_next[n] =3D p->first_tb; >> + tb->page_next[n] =3D p->first_tb[current_cpu->cpu_index]; >> #ifndef CONFIG_USER_ONLY >> - page_already_protected =3D p->first_tb !=3D NULL; >> + page_already_protected =3D p->first_tb[current_cpu->cpu_index] !=3D= NULL; >> #endif >> - p->first_tb =3D (TranslationBlock *)((uintptr_t)tb | n); >> + p->first_tb[current_cpu->cpu_index] >> + =3D (TranslationBlock *)((uintptr_t)tb | n); >> invalidate_page_bitmap(p); >> =20 >> #if defined(TARGET_HAS_SMC) || 1 >> @@ -1821,7 +1825,7 @@ void page_set_flags(target_ulong start, target_u= long end, int flags) >> the code inside. */ >> if (!(p->flags & PAGE_WRITE) && >> (flags & PAGE_WRITE) && >> - p->first_tb) { >> + p->first_tb[current_cpu->cpu_index]) { >> tb_invalidate_phys_page(addr, 0, NULL, false); >> } >> p->flags =3D flags; > As the TranslationBlock itself has a linked list for page related > blocks: > > struct TranslationBlock *page_next[2]; > > could we not just come up with a structure that chains them together > here? > Hi Alex, Thanks for looking at this. We don't know how many time this first_tb is accessed right now.. You suggest to chains tb instead of using an array for this? This make sense but I think it means we will have to protect this by a=20 mutex as well? Thanks, Fred