From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:49460) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YGLRQ-0001lx-QP for qemu-devel@nongnu.org; Wed, 28 Jan 2015 00:45:30 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YGLRM-0004GZ-Kd for qemu-devel@nongnu.org; Wed, 28 Jan 2015 00:45:28 -0500 Received: from mx1.redhat.com ([209.132.183.28]:52370) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YGLRM-0004GV-Da for qemu-devel@nongnu.org; Wed, 28 Jan 2015 00:45:24 -0500 Date: Wed, 28 Jan 2015 13:45:15 +0800 From: Fam Zheng Message-ID: <20150128054515.GA10117@ad.nay.redhat.com> References: <1421938053-10318-1-git-send-email-pbonzini@redhat.com> <1421938053-10318-11-git-send-email-pbonzini@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1421938053-10318-11-git-send-email-pbonzini@redhat.com> Subject: Re: [Qemu-devel] [PATCH 10/15] exec: RCUify AddressSpaceDispatch List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Paolo Bonzini Cc: borntraeger@de.ibm.com, qemu-devel@nongnu.org, stefanha@redhat.com On Thu, 01/22 15:47, Paolo Bonzini wrote: > Note that even after this patch, most callers of address_space_* > functions must still be under the big QEMU lock, otherwise the memory > region returned by address_space_translate can disappear as soon as > address_space_translate returns. This will be fixed in the next part > of this series. > > Signed-off-by: Paolo Bonzini > --- > cpu-exec.c | 13 ++++++++++++- > cpus.c | 2 +- > cputlb.c | 8 ++++++-- > exec.c | 34 ++++++++++++++++++++++++++-------- > hw/i386/intel_iommu.c | 3 +++ > hw/pci-host/apb.c | 1 + > hw/ppc/spapr_iommu.c | 1 + > include/exec/exec-all.h | 1 + > 8 files changed, 51 insertions(+), 12 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index 12473ff..edc5eb9 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -26,6 +26,7 @@ > #include "qemu/timer.h" > #include "exec/address-spaces.h" > #include "exec/memory-internal.h" > +#include "qemu/rcu.h" > > /* -icount align implementation. */ > > @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) > > void cpu_reload_memory_map(CPUState *cpu) > { > + AddressSpaceDispatch *d; > + > + if (qemu_in_vcpu_thread()) { > + rcu_read_unlock(); Could you explain why we need to split the critical section? Maybe a line of comment helps here. > + rcu_read_lock(); > + } > + > /* The CPU and TLB are protected by the iothread lock. */ > - AddressSpaceDispatch *d = cpu->as->dispatch; > + d = atomic_rcu_read(&cpu->as->dispatch); > cpu->memory_dispatch = d; > tlb_flush(cpu, 1); > } > @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env) > * an instruction scheduling constraint on modern architectures. */ > smp_mb(); > > + rcu_read_lock(); > + > if (unlikely(exit_request)) { > cpu->exit_request = 1; > } > @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env) > } /* for(;;) */ > > cc->cpu_exec_exit(cpu); > + rcu_read_unlock(); > > /* fail safe : never use current_cpu outside cpu_exec() */ > current_cpu = NULL; > diff --git a/cpus.c b/cpus.c > index 3a5323b..b02c793 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu) > return qemu_thread_is_self(cpu->thread); > } > > -static bool qemu_in_vcpu_thread(void) > +bool qemu_in_vcpu_thread(void) > { > return current_cpu && qemu_cpu_is_self(current_cpu); > } > diff --git a/cputlb.c b/cputlb.c > index f92db5e..38f2151 100644 > --- a/cputlb.c > +++ b/cputlb.c > @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, > } > > /* Add a new TLB entry. At most one entry for a given virtual address > - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the > - supplied size is only used by tlb_flush_page. */ > + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the > + * supplied size is only used by tlb_flush_page. > + * > + * Called from TCG-generated code, which is under an RCU read-side > + * critical section. > + */ > void tlb_set_page(CPUState *cpu, target_ulong vaddr, > hwaddr paddr, int prot, > int mmu_idx, target_ulong size) > diff --git a/exec.c b/exec.c > index 762ec76..262e8bc 100644 > --- a/exec.c > +++ b/exec.c > @@ -115,6 +115,8 @@ struct PhysPageEntry { > typedef PhysPageEntry Node[P_L2_SIZE]; > > typedef struct PhysPageMap { > + struct rcu_head rcu; > + > unsigned sections_nb; > unsigned sections_nb_alloc; > unsigned nodes_nb; > @@ -124,6 +126,8 @@ typedef struct PhysPageMap { > } PhysPageMap; > > struct AddressSpaceDispatch { > + struct rcu_head rcu; > + > /* This is a multi-level map on the physical address space. > * The bottom level has pointers to MemoryRegionSections. > */ > @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) > && mr != &io_mem_watch; > } > > +/* Called from RCU critical section */ > static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, > hwaddr addr, > bool resolve_subpage) > @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, > return section; > } > > +/* Called from RCU critical section */ > static MemoryRegionSection * > address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, > hwaddr *plen, bool resolve_subpage) > @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, > MemoryRegion *mr; > hwaddr len = *plen; > > + rcu_read_lock(); > for (;;) { > - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); > + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); > + section = address_space_translate_internal(d, addr, &addr, plen, true); > mr = section->mr; > > if (!mr->iommu_ops) { > @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, > > *plen = len; > *xlat = addr; > + rcu_read_unlock(); > return mr; > } > > +/* Called from RCU critical section */ > MemoryRegionSection * > address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, > hwaddr *xlat, hwaddr *plen) > @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) > in_migration = enable; > } > > +/* Called from RCU critical section */ > hwaddr memory_region_section_get_iotlb(CPUState *cpu, > MemoryRegionSection *section, > target_ulong vaddr, > @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, > > MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) > { > - MemoryRegionSection *sections = cpu->memory_dispatch->map.sections; > + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); > + MemoryRegionSection *sections = d->map.sections; > > return sections[index & ~TARGET_PAGE_MASK].mr; > } > @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener) > as->next_dispatch = d; > } > > +static void address_space_dispatch_free(AddressSpaceDispatch *d) > +{ > + phys_sections_free(&d->map); If I understand it, this code doesn't hold iothread lock when releasing the memory region, but in one of the memroy region destructors, memory_region_destructor_ram_from_ptr: void qemu_ram_free_from_ptr(ram_addr_t addr) { RAMBlock *block; /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); QTAILQ_FOREACH(block, &ram_list.blocks, next) { ... Is the comment stale or I missed something? Fam > + g_free(d); > +} > + > static void mem_commit(MemoryListener *listener) > { > AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); > @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener) > > phys_page_compact_all(next, next->map.nodes_nb); > > - as->dispatch = next; > - > + atomic_rcu_set(&as->dispatch, next); > if (cur) { > - phys_sections_free(&cur->map); > - g_free(cur); > + call_rcu(cur, address_space_dispatch_free, rcu); > } > } > > @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as) > AddressSpaceDispatch *d = as->dispatch; > > memory_listener_unregister(&as->dispatch_listener); > - g_free(d); > - as->dispatch = NULL; > + atomic_rcu_set(&as->dispatch, NULL); > + if (d) { > + call_rcu(d, address_space_dispatch_free, rcu); > + } > } > > static void memory_map_init(void) > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 0a4282a..7da70ff 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) > > /* Map dev to context-entry then do a paging-structures walk to do a iommu > * translation. > + * > + * Called from RCU critical section. > + * > * @bus_num: The bus number > * @devfn: The devfn, which is the combined of device and function number > * @is_write: The access is a write operation > diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c > index f573875..832b6c7 100644 > --- a/hw/pci-host/apb.c > +++ b/hw/pci-host/apb.c > @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) > return &is->iommu_as; > } > > +/* Called from RCU critical section */ > static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, > bool is_write) > { > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c > index da47474..ba003da 100644 > --- a/hw/ppc/spapr_iommu.c > +++ b/hw/ppc/spapr_iommu.c > @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) > return NULL; > } > > +/* Called from RCU critical section */ > static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, > bool is_write) > { > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index bb3fd37..8eb0db3 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, > int is_cpu_write_access); > #if !defined(CONFIG_USER_ONLY) > +bool qemu_in_vcpu_thread(void); > void cpu_reload_memory_map(CPUState *cpu); > void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); > /* cputlb.c */ > -- > 1.8.3.1 > >