From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:60154) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YNQFm-0001ai-GE for qemu-devel@nongnu.org; Mon, 16 Feb 2015 13:18:43 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YNQFk-0001Sp-IO for qemu-devel@nongnu.org; Mon, 16 Feb 2015 13:18:42 -0500 Received: from mail-wg0-x22e.google.com ([2a00:1450:400c:c00::22e]:37546) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YNQFj-0001Sc-Tk for qemu-devel@nongnu.org; Mon, 16 Feb 2015 13:18:40 -0500 Received: by mail-wg0-f46.google.com with SMTP id a1so30918719wgh.5 for ; Mon, 16 Feb 2015 10:18:39 -0800 (PST) Received: from playground.station (net-93-66-73-217.cust.vodafonedsl.it. [93.66.73.217]) by mx.google.com with ESMTPSA id p6sm20746248wia.14.2015.02.16.10.18.37 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 16 Feb 2015 10:18:38 -0800 (PST) Sender: Paolo Bonzini From: Paolo Bonzini Date: Mon, 16 Feb 2015 19:17:56 +0100 Message-Id: <1424110682-14334-16-git-send-email-pbonzini@redhat.com> In-Reply-To: <1424110682-14334-1-git-send-email-pbonzini@redhat.com> References: <1424110682-14334-1-git-send-email-pbonzini@redhat.com> Subject: [Qemu-devel] [PULL 15/21] exec: RCUify AddressSpaceDispatch List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Note that even after this patch, most callers of address_space_* functions must still be under the big QEMU lock, otherwise the memory region returned by address_space_translate can disappear as soon as address_space_translate returns. This will be fixed in the next part of this series. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- cpu-exec.c | 25 ++++++++++++++++++++++++- cpus.c | 2 +- cputlb.c | 8 ++++++-- exec.c | 34 ++++++++++++++++++++++++++-------- hw/i386/intel_iommu.c | 3 +++ hw/pci-host/apb.c | 1 + hw/ppc/spapr_iommu.c | 1 + include/exec/exec-all.h | 1 + 8 files changed, 63 insertions(+), 12 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 98f968d..adb939a 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -26,6 +26,7 @@ #include "qemu/timer.h" #include "exec/address-spaces.h" #include "exec/memory-internal.h" +#include "qemu/rcu.h" /* -icount align implementation. */ @@ -146,8 +147,27 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) void cpu_reload_memory_map(CPUState *cpu) { + AddressSpaceDispatch *d; + + if (qemu_in_vcpu_thread()) { + /* Do not let the guest prolong the critical section as much as it + * as it desires. + * + * Currently, this is prevented by the I/O thread's periodinc kicking + * of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread) + * but this will go away once TCG's execution moves out of the global + * mutex. + * + * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which + * only protects cpu->as->dispatch. Since we reload it below, we can + * split the critical section. + */ + rcu_read_unlock(); + rcu_read_lock(); + } + /* The CPU and TLB are protected by the iothread lock. */ - AddressSpaceDispatch *d = cpu->as->dispatch; + d = atomic_rcu_read(&cpu->as->dispatch); cpu->memory_dispatch = d; tlb_flush(cpu, 1); } @@ -362,6 +382,8 @@ int cpu_exec(CPUArchState *env) * an instruction scheduling constraint on modern architectures. */ smp_mb(); + rcu_read_lock(); + if (unlikely(exit_request)) { cpu->exit_request = 1; } @@ -564,6 +586,7 @@ int cpu_exec(CPUArchState *env) } /* for(;;) */ cc->cpu_exec_exit(cpu); + rcu_read_unlock(); /* fail safe : never use current_cpu outside cpu_exec() */ current_cpu = NULL; diff --git a/cpus.c b/cpus.c index 0cdd1d7..b826fac 100644 --- a/cpus.c +++ b/cpus.c @@ -1104,7 +1104,7 @@ bool qemu_cpu_is_self(CPUState *cpu) return qemu_thread_is_self(cpu->thread); } -static bool qemu_in_vcpu_thread(void) +bool qemu_in_vcpu_thread(void) { return current_cpu && qemu_cpu_is_self(current_cpu); } diff --git a/cputlb.c b/cputlb.c index f92db5e..38f2151 100644 --- a/cputlb.c +++ b/cputlb.c @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, } /* Add a new TLB entry. At most one entry for a given virtual address - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the - supplied size is only used by tlb_flush_page. */ + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the + * supplied size is only used by tlb_flush_page. + * + * Called from TCG-generated code, which is under an RCU read-side + * critical section. + */ void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size) diff --git a/exec.c b/exec.c index fe1e60a..76b3b6c 100644 --- a/exec.c +++ b/exec.c @@ -115,6 +115,8 @@ struct PhysPageEntry { typedef PhysPageEntry Node[P_L2_SIZE]; typedef struct PhysPageMap { + struct rcu_head rcu; + unsigned sections_nb; unsigned sections_nb_alloc; unsigned nodes_nb; @@ -124,6 +126,8 @@ typedef struct PhysPageMap { } PhysPageMap; struct AddressSpaceDispatch { + struct rcu_head rcu; + /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) && mr != &io_mem_watch; } +/* Called from RCU critical section */ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, return section; } +/* Called from RCU critical section */ static MemoryRegionSection * address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, hwaddr *plen, bool resolve_subpage) @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, MemoryRegion *mr; hwaddr len = *plen; + rcu_read_lock(); for (;;) { - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); + section = address_space_translate_internal(d, addr, &addr, plen, true); mr = section->mr; if (!mr->iommu_ops) { @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, *plen = len; *xlat = addr; + rcu_read_unlock(); return mr; } +/* Called from RCU critical section */ MemoryRegionSection * address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, hwaddr *xlat, hwaddr *plen) @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) in_migration = enable; } +/* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, target_ulong vaddr, @@ -1964,7 +1975,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) { - MemoryRegionSection *sections = cpu->memory_dispatch->map.sections; + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); + MemoryRegionSection *sections = d->map.sections; return sections[index & ~TARGET_PAGE_MASK].mr; } @@ -2000,6 +2012,12 @@ static void mem_begin(MemoryListener *listener) as->next_dispatch = d; } +static void address_space_dispatch_free(AddressSpaceDispatch *d) +{ + phys_sections_free(&d->map); + g_free(d); +} + static void mem_commit(MemoryListener *listener) { AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); @@ -2008,11 +2026,9 @@ static void mem_commit(MemoryListener *listener) phys_page_compact_all(next, next->map.nodes_nb); - as->dispatch = next; - + atomic_rcu_set(&as->dispatch, next); if (cur) { - phys_sections_free(&cur->map); - g_free(cur); + call_rcu(cur, address_space_dispatch_free, rcu); } } @@ -2071,8 +2087,10 @@ void address_space_destroy_dispatch(AddressSpace *as) { AddressSpaceDispatch *d = as->dispatch; - g_free(d); - as->dispatch = NULL; + atomic_rcu_set(&as->dispatch, NULL); + if (d) { + call_rcu(d, address_space_dispatch_free, rcu); + } } static void memory_map_init(void) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0a4282a..7da70ff 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. + * + * Called from RCU critical section. + * * @bus_num: The bus number * @devfn: The devfn, which is the combined of device and function number * @is_write: The access is a write operation diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index f573875..832b6c7 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } +/* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index da47474..ba003da 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } +/* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bb3fd37..8eb0db3 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); #if !defined(CONFIG_USER_ONLY) +bool qemu_in_vcpu_thread(void); void cpu_reload_memory_map(CPUState *cpu); void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); /* cputlb.c */ -- 2.3.0