All of lore.kernel.org
 help / color / mirror / Atom feed
From: Blue Swirl <blauwirbel@gmail.com>
To: Liu Ping Fan <qemulist@gmail.com>
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org,
	"Anthony Liguori" <anthony@codemonkey.ws>,
	"Avi Kivity" <avi@redhat.com>,
	"Jan Kiszka" <jan.kiszka@siemens.com>,
	"Marcelo Tosatti" <mtosatti@redhat.com>,
	"Stefan Hajnoczi" <stefanha@gmail.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Andreas Färber" <afaerber@suse.de>
Subject: Re: [PATCH 09/15] memory: prepare flatview and radix-tree for rcu style access
Date: Wed, 8 Aug 2012 19:23:51 +0000	[thread overview]
Message-ID: <CAAu8pHv7KbH7maxzCGrxop9p=1s8frW0XefcO1RoPb_+6cJn5w@mail.gmail.com> (raw)
In-Reply-To: <1344407156-25562-10-git-send-email-qemulist@gmail.com>

On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Flatview and radix view are all under the protection of pointer.
> And this make sure the change of them seem to be atomic!
>
> The mr accessed by radix-tree leaf or flatview will be reclaimed
> after the prev PhysMap not in use any longer
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
>  hw/vhost.c  |    2 +-
>  hw/xen_pt.c |    2 +-
>  kvm-all.c   |    2 +-
>  memory.c    |   92 ++++++++++++++-----
>  memory.h    |    9 ++-
>  vl.c        |    1 +
>  xen-all.c   |    2 +-
>  8 files changed, 286 insertions(+), 127 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 01b91b0..97addb9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -24,6 +24,7 @@
>  #include <sys/mman.h>
>  #endif
>
> +#include "qemu/atomic.h"
>  #include "qemu-common.h"
>  #include "cpu.h"
>  #include "tcg.h"
> @@ -35,6 +36,8 @@
>  #include "qemu-timer.h"
>  #include "memory.h"
>  #include "exec-memory.h"
> +#include "qemu-thread.h"
> +#include "qemu/reclaimer.h"
>  #if defined(CONFIG_USER_ONLY)
>  #include <qemu.h>
>  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> @@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static MemoryRegionSection *phys_sections;
> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
>  static uint16_t phys_section_unassigned;
>  static uint16_t phys_section_notdirty;
>  static uint16_t phys_section_rom;
>  static uint16_t phys_section_watch;
>
> -
> -/* Simple allocator for PhysPageEntry nodes */
> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> -/* This is a multi-level map on the physical address space.
> -   The bottom level has pointers to MemoryRegionSections.  */
> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> -
> +static QemuMutex cur_map_lock;
> +static PhysMap *cur_map;
>  QemuMutex mem_map_lock;
> +static PhysMap *next_map;
>
>  static void io_mem_init(void);
>  static void memory_map_init(void);
> @@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>  {
> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>          typedef PhysPageEntry Node[L2_SIZE];
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> -                                      phys_map_nodes_nb + nodes);
> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
> -                                 phys_map_nodes_nb_alloc);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
> +                                                                        16);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
> +                                      map->phys_map_nodes_nb + nodes);
> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
> +                                 map->phys_map_nodes_nb_alloc);
>      }
>  }
>
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>
> -    ret = phys_map_nodes_nb++;
> +    ret = map->phys_map_nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != phys_map_nodes_nb_alloc);
> +    assert(ret != map->phys_map_nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        phys_map_nodes[ret][i].is_leaf = 0;
> -        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->phys_map_nodes[ret][i].is_leaf = 0;
> +        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>
> -static void phys_map_nodes_reset(void)
> -{
> -    phys_map_nodes_nb = 0;
> -}
> -
> -
> -static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
> -                                target_phys_addr_t *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
> +                                target_phys_addr_t *index,
> +                                target_phys_addr_t *nb,
> +                                uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>      target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
>
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = phys_map_nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->phys_map_nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -434,7 +426,7 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              }
>          }
>      } else {
> -        p = phys_map_nodes[lp->ptr];
> +        p = map->phys_map_nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>
> @@ -445,24 +437,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
>  }
>
> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
> -                          uint16_t leaf)
> +static void phys_page_set(PhysMap *map, target_phys_addr_t index,
> +                            target_phys_addr_t nb,
> +                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
>
> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    /* update in new tree*/
> +    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
> +                           target_phys_addr_t index)
>  {
> -    PhysPageEntry lp = phys_map;
> +    PhysPageEntry lp = map->root;
>      PhysPageEntry *p;
>      int i;
>      uint16_t s_index = phys_section_unassigned;
> @@ -471,13 +466,79 @@ MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
>              goto not_found;
>          }
> -        p = phys_map_nodes[lp.ptr];
> +        p = map->phys_map_nodes[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>      }
>
>      s_index = lp.ptr;
>  not_found:
> -    return &phys_sections[s_index];
> +    return &map->phys_sections[s_index];
> +}
> +
> +MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +{
> +    return phys_page_find_internal(cur_map, index);
> +}
> +
> +void physmap_get(PhysMap *map)
> +{
> +    atomic_inc(&map->ref);
> +}
> +
> +/* Untill rcu read side finished, do this reclaim */

Until

> +static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };

Please insert a blank line here.

> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
> +{
> +    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
> +}
> +
> +static void destroy_all_mappings(PhysMap *map);

Prototypes belong to the top of the file.

> +static void phys_map_release(PhysMap *map)
> +{
> +    /* emulate for rcu reclaimer for mr */
> +    reclaimer_worker(&physmap_reclaimer_list);
> +
> +    destroy_all_mappings(map);
> +    g_free(map->phys_map_nodes);
> +    g_free(map->phys_sections);
> +    g_free(map->views[0].ranges);
> +    g_free(map->views[1].ranges);
> +    g_free(map);
> +}
> +
> +void physmap_put(PhysMap *map)
> +{
> +    if (atomic_dec_and_test(&map->ref)) {
> +        phys_map_release(map);
> +    }
> +}
> +
> +void cur_map_update(PhysMap *next)
> +{
> +    qemu_mutex_lock(&cur_map_lock);
> +    physmap_put(cur_map);
> +    cur_map = next;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +}
> +
> +PhysMap *cur_map_get(void)
> +{
> +    PhysMap *ret;
> +
> +    qemu_mutex_lock(&cur_map_lock);
> +    ret = cur_map;
> +    physmap_get(ret);
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +    return ret;
> +}
> +
> +PhysMap *alloc_next_map(void)
> +{
> +    PhysMap *next = g_malloc0(sizeof(PhysMap));
> +    atomic_set(&next->ref, 1);
> +    return next;
>  }
>
>  bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -632,6 +693,7 @@ void cpu_exec_init_all(void)
>      memory_map_init();
>      io_mem_init();
>      qemu_mutex_init(&mem_map_lock);
> +    qemu_mutex_init(&cur_map_lock);
>  #endif
>  }
>
> @@ -2161,17 +2223,18 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
>
>  #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
>  typedef struct subpage_t {
> +    PhysMap *map;
>      MemoryRegion iomem;
>      target_phys_addr_t base;
>      uint16_t sub_section[TARGET_PAGE_SIZE];
>  } subpage_t;
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section);
> -static subpage_t *subpage_init(target_phys_addr_t base);
> -static void destroy_page_desc(uint16_t section_index)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                            uint32_t end, uint16_t section);
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
> +static void destroy_page_desc(PhysMap *map, uint16_t section_index)
>  {
> -    MemoryRegionSection *section = &phys_sections[section_index];
> +    MemoryRegionSection *section = &map->phys_sections[section_index];
>      MemoryRegion *mr = section->mr;
>
>      if (mr->subpage) {
> @@ -2181,7 +2244,7 @@ static void destroy_page_desc(uint16_t section_index)
>      }
>  }
>
> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
> +static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
>  {
>      unsigned i;
>      PhysPageEntry *p;
> @@ -2190,38 +2253,34 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>          return;
>      }
>
> -    p = phys_map_nodes[lp->ptr];
> +    p = map->phys_map_nodes[lp->ptr];
>      for (i = 0; i < L2_SIZE; ++i) {
>          if (!p[i].is_leaf) {
> -            destroy_l2_mapping(&p[i], level - 1);
> +            destroy_l2_mapping(map, &p[i], level - 1);
>          } else {
> -            destroy_page_desc(p[i].ptr);
> +            destroy_page_desc(map, p[i].ptr);
>          }
>      }
>      lp->is_leaf = 0;
>      lp->ptr = PHYS_MAP_NODE_NIL;
>  }
>
> -static void destroy_all_mappings(void)
> +static void destroy_all_mappings(PhysMap *map)
>  {
> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
> -    phys_map_nodes_reset();
> -}
> +    PhysPageEntry *root = &map->root;
>
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> -{
> -    if (phys_sections_nb == phys_sections_nb_alloc) {
> -        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
> -        phys_sections = g_renew(MemoryRegionSection, phys_sections,
> -                                phys_sections_nb_alloc);
> -    }
> -    phys_sections[phys_sections_nb] = *section;
> -    return phys_sections_nb++;
> +    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
>  }
>
> -static void phys_sections_clear(void)
> +static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
>  {
> -    phys_sections_nb = 0;
> +    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
> +        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
> +        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
> +                                map->phys_sections_nb_alloc);
> +    }
> +    map->phys_sections[map->phys_sections_nb] = *section;
> +    return map->phys_sections_nb++;
>  }
>
>  /* register physical memory.
> @@ -2232,12 +2291,13 @@ static void phys_sections_clear(void)
>     start_addr and region_offset are rounded down to a page boundary
>     before calculating this offset.  This should not be a problem unless
>     the low bits of start_addr and region_offset differ.  */
> -static void register_subpage(MemoryRegionSection *section)
> +static void register_subpage(PhysMap *map, MemoryRegionSection *section)
>  {
>      subpage_t *subpage;
>      target_phys_addr_t base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
> +    MemoryRegionSection *existing = phys_page_find_internal(map,
> +                                            base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = TARGET_PAGE_SIZE,
> @@ -2247,30 +2307,30 @@ static void register_subpage(MemoryRegionSection *section)
>      assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
>
>      if (!(existing->mr->subpage)) {
> -        subpage = subpage_init(base);
> +        subpage = subpage_init(map, base);
>          subsection.mr = &subpage->iomem;
> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
> +                      phys_section_add(map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + section->size;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(map, subpage, start, end, phys_section_add(map, section));
>  }
>
>
> -static void register_multipage(MemoryRegionSection *section)
> +static void register_multipage(PhysMap *map, MemoryRegionSection *section)
>  {
>      target_phys_addr_t start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
>      target_phys_addr_t addr;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(map, section);
>
>      assert(size);
>
>      addr = start_addr;
> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
> +    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>                    section_index);
>  }
>
> @@ -2278,13 +2338,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>                                        bool readonly)
>  {
>      MemoryRegionSection now = *section, remain = *section;
> +    PhysMap *map = next_map;
>
>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
>          || (now.size < TARGET_PAGE_SIZE)) {
>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space,
>                         now.size);
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
> @@ -2292,14 +2353,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>      now = remain;
>      now.size &= TARGET_PAGE_MASK;
>      if (now.size) {
> -        register_multipage(&now);
> +        register_multipage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
>      }
>      now = remain;
>      if (now.size) {
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>      }
>  }
>
> @@ -3001,7 +3062,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
>             mmio, len, addr, idx);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3020,7 +3081,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
>             __func__, mmio, len, addr, idx, value);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3065,8 +3126,8 @@ static const MemoryRegionOps subpage_ram_ops = {
>      .endianness = DEVICE_NATIVE_ENDIAN,
>  };
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                              uint32_t end, uint16_t section)
>  {
>      int idx, eidx;
>
> @@ -3078,10 +3139,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
>             mmio, start, end, idx, eidx, memory);
>  #endif
> -    if (memory_region_is_ram(phys_sections[section].mr)) {
> -        MemoryRegionSection new_section = phys_sections[section];
> +    if (memory_region_is_ram(map->phys_sections[section].mr)) {
> +        MemoryRegionSection new_section = map->phys_sections[section];
>          new_section.mr = &io_mem_subpage_ram;
> -        section = phys_section_add(&new_section);
> +        section = phys_section_add(map, &new_section);
>      }
>      for (; idx <= eidx; idx++) {
>          mmio->sub_section[idx] = section;
> @@ -3090,12 +3151,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      return 0;
>  }
>
> -static subpage_t *subpage_init(target_phys_addr_t base)
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
>  {
>      subpage_t *mmio;
>
>      mmio = g_malloc0(sizeof(subpage_t));
>
> +    mmio->map = map;
>      mmio->base = base;
>      memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
>                            "subpage", TARGET_PAGE_SIZE);
> @@ -3104,12 +3166,12 @@ static subpage_t *subpage_init(target_phys_addr_t base)
>      printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
>             mmio, base, TARGET_PAGE_SIZE, subpage_memory);
>  #endif
> -    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
> +    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
>
>      return mmio;
>  }
>
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -3118,7 +3180,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = UINT64_MAX,
>      };
>
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>
>  MemoryRegion *iotlb_to_region(target_phys_addr_t index)
> @@ -3140,15 +3202,32 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>
> -static void core_begin(MemoryListener *listener)
> +#if 0
> +static void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL,
> +                             .nr = 0,
> +                             .nr_allocated = 0,
> +    };
> +
> +    init_map.views[0] = v;
> +    init_map.views[1] = v;
> +    cur_map =  &init_map;
> +}
> +#endif

Please delete.

> +
> +static void core_begin(MemoryListener *listener, PhysMap *new_map)
>  {
> -    destroy_all_mappings();
> -    phys_sections_clear();
> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
> -    phys_section_unassigned = dummy_section(&io_mem_unassigned);
> -    phys_section_notdirty = dummy_section(&io_mem_notdirty);
> -    phys_section_rom = dummy_section(&io_mem_rom);
> -    phys_section_watch = dummy_section(&io_mem_watch);
> +
> +    new_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    new_map->root.is_leaf = 0;
> +
> +    /* In all the map, these sections have the same index */
> +    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
> +    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
> +    phys_section_rom = dummy_section(new_map, &io_mem_rom);
> +    phys_section_watch = dummy_section(new_map, &io_mem_watch);
> +    next_map = new_map;
>  }
>
>  static void core_commit(MemoryListener *listener)
> @@ -3161,6 +3240,16 @@ static void core_commit(MemoryListener *listener)
>      for(env = first_cpu; env != NULL; env = env->next_cpu) {
>          tlb_flush(env, 1);
>      }
> +
> +/* move into high layer
> +    qemu_mutex_lock(&cur_map_lock);
> +    if (cur_map != NULL) {
> +        physmap_put(cur_map);
> +    }
> +    cur_map = next_map;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +*/

Also commented out code should be deleted.

>  }
>
>  static void core_region_add(MemoryListener *listener,
> @@ -3217,7 +3306,7 @@ static void core_eventfd_del(MemoryListener *listener,
>  {
>  }
>
> -static void io_begin(MemoryListener *listener)
> +static void io_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> @@ -3329,6 +3418,20 @@ static void memory_map_init(void)
>      memory_listener_register(&io_memory_listener, system_io);
>  }
>
> +void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
> +                           };
> +    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
> +
> +    atomic_set(&init_map->ref, 1);
> +    init_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    init_map->root.is_leaf = 0;
> +    init_map->views[0] = v;
> +    init_map->views[1] = v;
> +    cur_map = init_map;
> +}
> +
>  MemoryRegion *get_system_memory(void)
>  {
>      return system_memory;
> @@ -3391,6 +3494,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>      uint32_t val;
>      target_phys_addr_t page;
>      MemoryRegionSection *section;
> +    PhysMap *cur = cur_map_get();
>
>      while (len > 0) {
>          page = addr & TARGET_PAGE_MASK;
> @@ -3472,6 +3576,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>          buf += l;
>          addr += l;
>      }
> +    physmap_put(cur);
>  }
>
>  /* used for ROM loading : can write in RAM and ROM */
> diff --git a/hw/vhost.c b/hw/vhost.c
> index 43664e7..df58345 100644
> --- a/hw/vhost.c
> +++ b/hw/vhost.c
> @@ -438,7 +438,7 @@ static bool vhost_section(MemoryRegionSection *section)
>          && memory_region_is_ram(section->mr);
>  }
>
> -static void vhost_begin(MemoryListener *listener)
> +static void vhost_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/hw/xen_pt.c b/hw/xen_pt.c
> index 3b6d186..fba8586 100644
> --- a/hw/xen_pt.c
> +++ b/hw/xen_pt.c
> @@ -597,7 +597,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
>      }
>  }
>
> -static void xen_pt_begin(MemoryListener *l)
> +static void xen_pt_begin(MemoryListener *l, PhysMap *next)
>  {
>  }
>
> diff --git a/kvm-all.c b/kvm-all.c
> index f8e4328..bc42cab 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -693,7 +693,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>      }
>  }
>
> -static void kvm_begin(MemoryListener *listener)
> +static void kvm_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/memory.c b/memory.c
> index c7f2cfd..54cdc7f 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -20,6 +20,7 @@
>  #include "kvm.h"
>  #include <assert.h>
>  #include "hw/qdev.h"
> +#include "qemu-thread.h"
>
>  #define WANT_EXEC_OBSOLETE
>  #include "exec-obsolete.h"
> @@ -192,7 +193,7 @@ typedef struct AddressSpaceOps AddressSpaceOps;
>  /* A system address space - I/O, memory, etc. */
>  struct AddressSpace {
>      MemoryRegion *root;
> -    FlatView current_map;
> +    int view_id;
>      int ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>  };
> @@ -232,11 +233,6 @@ static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
>      ++view->nr;
>  }
>
> -static void flatview_destroy(FlatView *view)
> -{
> -    g_free(view->ranges);
> -}
> -
>  static bool can_merge(FlatRange *r1, FlatRange *r2)
>  {
>      return int128_eq(addrrange_end(r1->addr), r2->addr.start)
> @@ -594,8 +590,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      MemoryRegionIoeventfd *ioeventfds = NULL;
>      AddrRange tmp;
>      unsigned i;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
>              tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
>                                    int128_sub(fr->addr.start,
> @@ -616,6 +614,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      g_free(as->ioeventfds);
>      as->ioeventfds = ioeventfds;
>      as->ioeventfd_nb = ioeventfd_nb;
> +    physmap_put(map);
>  }
>
>  static void address_space_update_topology_pass(AddressSpace *as,
> @@ -681,21 +680,23 @@ static void address_space_update_topology_pass(AddressSpace *as,
>  }
>
>
> -static void address_space_update_topology(AddressSpace *as)
> +static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
> +                                            PhysMap *next)
>  {
> -    FlatView old_view = as->current_map;
> +    FlatView old_view = prev->views[as->view_id];
>      FlatView new_view = generate_memory_topology(as->root);
>
>      address_space_update_topology_pass(as, old_view, new_view, false);
>      address_space_update_topology_pass(as, old_view, new_view, true);
> +    next->views[as->view_id] = new_view;
>
> -    as->current_map = new_view;
> -    flatview_destroy(&old_view);
>      address_space_update_ioeventfds(as);
>  }
>
>  static void memory_region_update_topology(MemoryRegion *mr)
>  {
> +    PhysMap *prev, *next;
> +
>      if (memory_region_transaction_depth) {
>          memory_region_update_pending |= !mr || mr->enabled;
>          return;
> @@ -705,16 +706,20 @@ static void memory_region_update_topology(MemoryRegion *mr)
>          return;
>      }
>
> -    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
> +     prev = cur_map_get();
> +    /* allocate PhysMap next here */
> +    next = alloc_next_map();
> +    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
>
>      if (address_space_memory.root) {
> -        address_space_update_topology(&address_space_memory);
> +        address_space_update_topology(&address_space_memory, prev, next);
>      }
>      if (address_space_io.root) {
> -        address_space_update_topology(&address_space_io);
> +        address_space_update_topology(&address_space_io, prev, next);
>      }
>
>      MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
> +    cur_map_update(next);
>
>      memory_region_update_pending = false;
>  }
> @@ -1071,7 +1076,7 @@ void memory_region_put(MemoryRegion *mr)
>
>      if (atomic_dec_and_test(&mr->ref)) {
>          /* to fix, using call_rcu( ,release) */
> -        mr->life_ops->put(mr);
> +        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
>      }
>  }
>
> @@ -1147,13 +1152,18 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>  {
>      FlatRange *fr;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
>                                            Forward, log_sync);
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
> @@ -1201,8 +1211,12 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>      FlatRange *fr;
>      CoalescedMemoryRange *cmr;
>      AddrRange tmp;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
>                                             int128_get64(fr->addr.size));
> @@ -1219,6 +1233,7 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>              }
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_coalescing(MemoryRegion *mr)
> @@ -1458,29 +1473,49 @@ static int cmp_flatrange_addr(const void *addr_, const void *fr_)
>      return 0;
>  }
>
> -static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
> +static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
>  {
> -    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
> +    return bsearch(&addr, view->ranges, view->nr,
>                     sizeof(FlatRange), cmp_flatrange_addr);
>  }
>
> +/* dec the ref, which inc by memory_region_find*/
> +void memory_region_section_put(MemoryRegionSection *mrs)
> +{
> +    if (mrs->mr != NULL) {
> +        memory_region_put(mrs->mr);
> +    }
> +}
> +
> +/* inc mr's ref. Caller need dec mr's ref */
>  MemoryRegionSection memory_region_find(MemoryRegion *address_space,
>                                         target_phys_addr_t addr, uint64_t size)
>  {
> +    PhysMap *map;
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      AddrRange range = addrrange_make(int128_make64(addr),
>                                       int128_make64(size));
> -    FlatRange *fr = address_space_lookup(as, range);
> +    FlatView *fview;
> +
> +    map = cur_map_get();
> +
> +    fview = &map->views[as->view_id];
> +    FlatRange *fr = address_space_lookup(fview, range);
>      MemoryRegionSection ret = { .mr = NULL, .size = 0 };
>
>      if (!fr) {
> +        physmap_put(map);
>          return ret;
>      }
>
> -    while (fr > as->current_map.ranges
> +    while (fr > fview->ranges
>             && addrrange_intersects(fr[-1].addr, range)) {
>          --fr;
>      }
> +    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
> +     */
> +    memory_region_get(fr->mr);
> +    physmap_put(map);
>
>      ret.mr = fr->mr;
>      range = addrrange_intersection(range, fr->addr);
> @@ -1497,10 +1532,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
>  {
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      FlatRange *fr;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_global_dirty_log_start(void)
> @@ -1519,6 +1557,8 @@ static void listener_add_address_space(MemoryListener *listener,
>                                         AddressSpace *as)
>  {
>      FlatRange *fr;
> +    PhysMap *map;
> +    FlatView *view;
>
>      if (listener->address_space_filter
>          && listener->address_space_filter != as->root) {
> @@ -1528,7 +1568,10 @@ static void listener_add_address_space(MemoryListener *listener,
>      if (global_dirty_log) {
>          listener->log_global_start(listener);
>      }
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +
> +    map = cur_map_get();
> +    view = &map->views[as->view_id];
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MemoryRegionSection section = {
>              .mr = fr->mr,
>              .address_space = as->root,
> @@ -1539,6 +1582,7 @@ static void listener_add_address_space(MemoryListener *listener,
>          };
>          listener->region_add(listener, &section);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
> @@ -1570,12 +1614,14 @@ void memory_listener_unregister(MemoryListener *listener)
>  void set_system_memory_map(MemoryRegion *mr)
>  {
>      address_space_memory.root = mr;
> +    address_space_memory.view_id = 0;
>      memory_region_update_topology(NULL);
>  }
>
>  void set_system_io_map(MemoryRegion *mr)
>  {
>      address_space_io.root = mr;
> +    address_space_io.view_id = 1;
>      memory_region_update_topology(NULL);
>  }
>
> diff --git a/memory.h b/memory.h
> index 357edd8..18442d4 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -256,7 +256,7 @@ typedef struct MemoryListener MemoryListener;
>   * Use with memory_listener_register() and memory_listener_unregister().
>   */
>  struct MemoryListener {
> -    void (*begin)(MemoryListener *listener);
> +    void (*begin)(MemoryListener *listener, PhysMap *next);
>      void (*commit)(MemoryListener *listener);
>      void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
>      void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
> @@ -829,6 +829,13 @@ void mtree_info(fprintf_function mon_printf, void *f);
>
>  void memory_region_get(MemoryRegion *mr);
>  void memory_region_put(MemoryRegion *mr);
> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
> +void physmap_get(PhysMap *map);
> +void physmap_put(PhysMap *map);
> +PhysMap *cur_map_get(void);
> +PhysMap *alloc_next_map(void);
> +void cur_map_update(PhysMap *next);
> +void physmap_init(void);
>  #endif
>
>  #endif
> diff --git a/vl.c b/vl.c
> index 1329c30..12af523 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -3346,6 +3346,7 @@ int main(int argc, char **argv, char **envp)
>      if (ram_size == 0) {
>          ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
>      }
> +    physmap_init();
>
>      configure_accelerator();
>
> diff --git a/xen-all.c b/xen-all.c
> index 59f2323..41d82fd 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -452,7 +452,7 @@ static void xen_set_memory(struct MemoryListener *listener,
>      }
>  }
>
> -static void xen_begin(MemoryListener *listener)
> +static void xen_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> --
> 1.7.4.4
>

WARNING: multiple messages have this Message-ID (diff)
From: Blue Swirl <blauwirbel@gmail.com>
To: Liu Ping Fan <qemulist@gmail.com>
Cc: kvm@vger.kernel.org, "Jan Kiszka" <jan.kiszka@siemens.com>,
	"Marcelo Tosatti" <mtosatti@redhat.com>,
	qemu-devel@nongnu.org, "Avi Kivity" <avi@redhat.com>,
	"Anthony Liguori" <anthony@codemonkey.ws>,
	"Stefan Hajnoczi" <stefanha@gmail.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Andreas Färber" <afaerber@suse.de>
Subject: Re: [Qemu-devel] [PATCH 09/15] memory: prepare flatview and radix-tree for rcu style access
Date: Wed, 8 Aug 2012 19:23:51 +0000	[thread overview]
Message-ID: <CAAu8pHv7KbH7maxzCGrxop9p=1s8frW0XefcO1RoPb_+6cJn5w@mail.gmail.com> (raw)
In-Reply-To: <1344407156-25562-10-git-send-email-qemulist@gmail.com>

On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Flatview and radix view are all under the protection of pointer.
> And this make sure the change of them seem to be atomic!
>
> The mr accessed by radix-tree leaf or flatview will be reclaimed
> after the prev PhysMap not in use any longer
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
>  hw/vhost.c  |    2 +-
>  hw/xen_pt.c |    2 +-
>  kvm-all.c   |    2 +-
>  memory.c    |   92 ++++++++++++++-----
>  memory.h    |    9 ++-
>  vl.c        |    1 +
>  xen-all.c   |    2 +-
>  8 files changed, 286 insertions(+), 127 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 01b91b0..97addb9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -24,6 +24,7 @@
>  #include <sys/mman.h>
>  #endif
>
> +#include "qemu/atomic.h"
>  #include "qemu-common.h"
>  #include "cpu.h"
>  #include "tcg.h"
> @@ -35,6 +36,8 @@
>  #include "qemu-timer.h"
>  #include "memory.h"
>  #include "exec-memory.h"
> +#include "qemu-thread.h"
> +#include "qemu/reclaimer.h"
>  #if defined(CONFIG_USER_ONLY)
>  #include <qemu.h>
>  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> @@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static MemoryRegionSection *phys_sections;
> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
>  static uint16_t phys_section_unassigned;
>  static uint16_t phys_section_notdirty;
>  static uint16_t phys_section_rom;
>  static uint16_t phys_section_watch;
>
> -
> -/* Simple allocator for PhysPageEntry nodes */
> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> -/* This is a multi-level map on the physical address space.
> -   The bottom level has pointers to MemoryRegionSections.  */
> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> -
> +static QemuMutex cur_map_lock;
> +static PhysMap *cur_map;
>  QemuMutex mem_map_lock;
> +static PhysMap *next_map;
>
>  static void io_mem_init(void);
>  static void memory_map_init(void);
> @@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>  {
> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>          typedef PhysPageEntry Node[L2_SIZE];
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> -                                      phys_map_nodes_nb + nodes);
> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
> -                                 phys_map_nodes_nb_alloc);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
> +                                                                        16);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
> +                                      map->phys_map_nodes_nb + nodes);
> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
> +                                 map->phys_map_nodes_nb_alloc);
>      }
>  }
>
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>
> -    ret = phys_map_nodes_nb++;
> +    ret = map->phys_map_nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != phys_map_nodes_nb_alloc);
> +    assert(ret != map->phys_map_nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        phys_map_nodes[ret][i].is_leaf = 0;
> -        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->phys_map_nodes[ret][i].is_leaf = 0;
> +        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>
> -static void phys_map_nodes_reset(void)
> -{
> -    phys_map_nodes_nb = 0;
> -}
> -
> -
> -static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
> -                                target_phys_addr_t *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
> +                                target_phys_addr_t *index,
> +                                target_phys_addr_t *nb,
> +                                uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>      target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
>
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = phys_map_nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->phys_map_nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -434,7 +426,7 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              }
>          }
>      } else {
> -        p = phys_map_nodes[lp->ptr];
> +        p = map->phys_map_nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>
> @@ -445,24 +437,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
>  }
>
> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
> -                          uint16_t leaf)
> +static void phys_page_set(PhysMap *map, target_phys_addr_t index,
> +                            target_phys_addr_t nb,
> +                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
>
> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    /* update in new tree*/
> +    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
> +                           target_phys_addr_t index)
>  {
> -    PhysPageEntry lp = phys_map;
> +    PhysPageEntry lp = map->root;
>      PhysPageEntry *p;
>      int i;
>      uint16_t s_index = phys_section_unassigned;
> @@ -471,13 +466,79 @@ MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
>              goto not_found;
>          }
> -        p = phys_map_nodes[lp.ptr];
> +        p = map->phys_map_nodes[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>      }
>
>      s_index = lp.ptr;
>  not_found:
> -    return &phys_sections[s_index];
> +    return &map->phys_sections[s_index];
> +}
> +
> +MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +{
> +    return phys_page_find_internal(cur_map, index);
> +}
> +
> +void physmap_get(PhysMap *map)
> +{
> +    atomic_inc(&map->ref);
> +}
> +
> +/* Untill rcu read side finished, do this reclaim */

Until

> +static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };

Please insert a blank line here.

> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
> +{
> +    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
> +}
> +
> +static void destroy_all_mappings(PhysMap *map);

Prototypes belong to the top of the file.

> +static void phys_map_release(PhysMap *map)
> +{
> +    /* emulate for rcu reclaimer for mr */
> +    reclaimer_worker(&physmap_reclaimer_list);
> +
> +    destroy_all_mappings(map);
> +    g_free(map->phys_map_nodes);
> +    g_free(map->phys_sections);
> +    g_free(map->views[0].ranges);
> +    g_free(map->views[1].ranges);
> +    g_free(map);
> +}
> +
> +void physmap_put(PhysMap *map)
> +{
> +    if (atomic_dec_and_test(&map->ref)) {
> +        phys_map_release(map);
> +    }
> +}
> +
> +void cur_map_update(PhysMap *next)
> +{
> +    qemu_mutex_lock(&cur_map_lock);
> +    physmap_put(cur_map);
> +    cur_map = next;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +}
> +
> +PhysMap *cur_map_get(void)
> +{
> +    PhysMap *ret;
> +
> +    qemu_mutex_lock(&cur_map_lock);
> +    ret = cur_map;
> +    physmap_get(ret);
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +    return ret;
> +}
> +
> +PhysMap *alloc_next_map(void)
> +{
> +    PhysMap *next = g_malloc0(sizeof(PhysMap));
> +    atomic_set(&next->ref, 1);
> +    return next;
>  }
>
>  bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -632,6 +693,7 @@ void cpu_exec_init_all(void)
>      memory_map_init();
>      io_mem_init();
>      qemu_mutex_init(&mem_map_lock);
> +    qemu_mutex_init(&cur_map_lock);
>  #endif
>  }
>
> @@ -2161,17 +2223,18 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
>
>  #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
>  typedef struct subpage_t {
> +    PhysMap *map;
>      MemoryRegion iomem;
>      target_phys_addr_t base;
>      uint16_t sub_section[TARGET_PAGE_SIZE];
>  } subpage_t;
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section);
> -static subpage_t *subpage_init(target_phys_addr_t base);
> -static void destroy_page_desc(uint16_t section_index)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                            uint32_t end, uint16_t section);
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
> +static void destroy_page_desc(PhysMap *map, uint16_t section_index)
>  {
> -    MemoryRegionSection *section = &phys_sections[section_index];
> +    MemoryRegionSection *section = &map->phys_sections[section_index];
>      MemoryRegion *mr = section->mr;
>
>      if (mr->subpage) {
> @@ -2181,7 +2244,7 @@ static void destroy_page_desc(uint16_t section_index)
>      }
>  }
>
> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
> +static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
>  {
>      unsigned i;
>      PhysPageEntry *p;
> @@ -2190,38 +2253,34 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>          return;
>      }
>
> -    p = phys_map_nodes[lp->ptr];
> +    p = map->phys_map_nodes[lp->ptr];
>      for (i = 0; i < L2_SIZE; ++i) {
>          if (!p[i].is_leaf) {
> -            destroy_l2_mapping(&p[i], level - 1);
> +            destroy_l2_mapping(map, &p[i], level - 1);
>          } else {
> -            destroy_page_desc(p[i].ptr);
> +            destroy_page_desc(map, p[i].ptr);
>          }
>      }
>      lp->is_leaf = 0;
>      lp->ptr = PHYS_MAP_NODE_NIL;
>  }
>
> -static void destroy_all_mappings(void)
> +static void destroy_all_mappings(PhysMap *map)
>  {
> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
> -    phys_map_nodes_reset();
> -}
> +    PhysPageEntry *root = &map->root;
>
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> -{
> -    if (phys_sections_nb == phys_sections_nb_alloc) {
> -        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
> -        phys_sections = g_renew(MemoryRegionSection, phys_sections,
> -                                phys_sections_nb_alloc);
> -    }
> -    phys_sections[phys_sections_nb] = *section;
> -    return phys_sections_nb++;
> +    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
>  }
>
> -static void phys_sections_clear(void)
> +static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
>  {
> -    phys_sections_nb = 0;
> +    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
> +        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
> +        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
> +                                map->phys_sections_nb_alloc);
> +    }
> +    map->phys_sections[map->phys_sections_nb] = *section;
> +    return map->phys_sections_nb++;
>  }
>
>  /* register physical memory.
> @@ -2232,12 +2291,13 @@ static void phys_sections_clear(void)
>     start_addr and region_offset are rounded down to a page boundary
>     before calculating this offset.  This should not be a problem unless
>     the low bits of start_addr and region_offset differ.  */
> -static void register_subpage(MemoryRegionSection *section)
> +static void register_subpage(PhysMap *map, MemoryRegionSection *section)
>  {
>      subpage_t *subpage;
>      target_phys_addr_t base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
> +    MemoryRegionSection *existing = phys_page_find_internal(map,
> +                                            base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = TARGET_PAGE_SIZE,
> @@ -2247,30 +2307,30 @@ static void register_subpage(MemoryRegionSection *section)
>      assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
>
>      if (!(existing->mr->subpage)) {
> -        subpage = subpage_init(base);
> +        subpage = subpage_init(map, base);
>          subsection.mr = &subpage->iomem;
> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
> +                      phys_section_add(map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + section->size;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(map, subpage, start, end, phys_section_add(map, section));
>  }
>
>
> -static void register_multipage(MemoryRegionSection *section)
> +static void register_multipage(PhysMap *map, MemoryRegionSection *section)
>  {
>      target_phys_addr_t start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
>      target_phys_addr_t addr;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(map, section);
>
>      assert(size);
>
>      addr = start_addr;
> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
> +    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>                    section_index);
>  }
>
> @@ -2278,13 +2338,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>                                        bool readonly)
>  {
>      MemoryRegionSection now = *section, remain = *section;
> +    PhysMap *map = next_map;
>
>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
>          || (now.size < TARGET_PAGE_SIZE)) {
>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space,
>                         now.size);
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
> @@ -2292,14 +2353,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>      now = remain;
>      now.size &= TARGET_PAGE_MASK;
>      if (now.size) {
> -        register_multipage(&now);
> +        register_multipage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
>      }
>      now = remain;
>      if (now.size) {
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>      }
>  }
>
> @@ -3001,7 +3062,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
>             mmio, len, addr, idx);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3020,7 +3081,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
>             __func__, mmio, len, addr, idx, value);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3065,8 +3126,8 @@ static const MemoryRegionOps subpage_ram_ops = {
>      .endianness = DEVICE_NATIVE_ENDIAN,
>  };
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                              uint32_t end, uint16_t section)
>  {
>      int idx, eidx;
>
> @@ -3078,10 +3139,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
>             mmio, start, end, idx, eidx, memory);
>  #endif
> -    if (memory_region_is_ram(phys_sections[section].mr)) {
> -        MemoryRegionSection new_section = phys_sections[section];
> +    if (memory_region_is_ram(map->phys_sections[section].mr)) {
> +        MemoryRegionSection new_section = map->phys_sections[section];
>          new_section.mr = &io_mem_subpage_ram;
> -        section = phys_section_add(&new_section);
> +        section = phys_section_add(map, &new_section);
>      }
>      for (; idx <= eidx; idx++) {
>          mmio->sub_section[idx] = section;
> @@ -3090,12 +3151,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      return 0;
>  }
>
> -static subpage_t *subpage_init(target_phys_addr_t base)
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
>  {
>      subpage_t *mmio;
>
>      mmio = g_malloc0(sizeof(subpage_t));
>
> +    mmio->map = map;
>      mmio->base = base;
>      memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
>                            "subpage", TARGET_PAGE_SIZE);
> @@ -3104,12 +3166,12 @@ static subpage_t *subpage_init(target_phys_addr_t base)
>      printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
>             mmio, base, TARGET_PAGE_SIZE, subpage_memory);
>  #endif
> -    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
> +    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
>
>      return mmio;
>  }
>
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -3118,7 +3180,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = UINT64_MAX,
>      };
>
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>
>  MemoryRegion *iotlb_to_region(target_phys_addr_t index)
> @@ -3140,15 +3202,32 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>
> -static void core_begin(MemoryListener *listener)
> +#if 0
> +static void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL,
> +                             .nr = 0,
> +                             .nr_allocated = 0,
> +    };
> +
> +    init_map.views[0] = v;
> +    init_map.views[1] = v;
> +    cur_map =  &init_map;
> +}
> +#endif

Please delete.

> +
> +static void core_begin(MemoryListener *listener, PhysMap *new_map)
>  {
> -    destroy_all_mappings();
> -    phys_sections_clear();
> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
> -    phys_section_unassigned = dummy_section(&io_mem_unassigned);
> -    phys_section_notdirty = dummy_section(&io_mem_notdirty);
> -    phys_section_rom = dummy_section(&io_mem_rom);
> -    phys_section_watch = dummy_section(&io_mem_watch);
> +
> +    new_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    new_map->root.is_leaf = 0;
> +
> +    /* In all the map, these sections have the same index */
> +    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
> +    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
> +    phys_section_rom = dummy_section(new_map, &io_mem_rom);
> +    phys_section_watch = dummy_section(new_map, &io_mem_watch);
> +    next_map = new_map;
>  }
>
>  static void core_commit(MemoryListener *listener)
> @@ -3161,6 +3240,16 @@ static void core_commit(MemoryListener *listener)
>      for(env = first_cpu; env != NULL; env = env->next_cpu) {
>          tlb_flush(env, 1);
>      }
> +
> +/* move into high layer
> +    qemu_mutex_lock(&cur_map_lock);
> +    if (cur_map != NULL) {
> +        physmap_put(cur_map);
> +    }
> +    cur_map = next_map;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +*/

Also commented out code should be deleted.

>  }
>
>  static void core_region_add(MemoryListener *listener,
> @@ -3217,7 +3306,7 @@ static void core_eventfd_del(MemoryListener *listener,
>  {
>  }
>
> -static void io_begin(MemoryListener *listener)
> +static void io_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> @@ -3329,6 +3418,20 @@ static void memory_map_init(void)
>      memory_listener_register(&io_memory_listener, system_io);
>  }
>
> +void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
> +                           };
> +    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
> +
> +    atomic_set(&init_map->ref, 1);
> +    init_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    init_map->root.is_leaf = 0;
> +    init_map->views[0] = v;
> +    init_map->views[1] = v;
> +    cur_map = init_map;
> +}
> +
>  MemoryRegion *get_system_memory(void)
>  {
>      return system_memory;
> @@ -3391,6 +3494,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>      uint32_t val;
>      target_phys_addr_t page;
>      MemoryRegionSection *section;
> +    PhysMap *cur = cur_map_get();
>
>      while (len > 0) {
>          page = addr & TARGET_PAGE_MASK;
> @@ -3472,6 +3576,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>          buf += l;
>          addr += l;
>      }
> +    physmap_put(cur);
>  }
>
>  /* used for ROM loading : can write in RAM and ROM */
> diff --git a/hw/vhost.c b/hw/vhost.c
> index 43664e7..df58345 100644
> --- a/hw/vhost.c
> +++ b/hw/vhost.c
> @@ -438,7 +438,7 @@ static bool vhost_section(MemoryRegionSection *section)
>          && memory_region_is_ram(section->mr);
>  }
>
> -static void vhost_begin(MemoryListener *listener)
> +static void vhost_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/hw/xen_pt.c b/hw/xen_pt.c
> index 3b6d186..fba8586 100644
> --- a/hw/xen_pt.c
> +++ b/hw/xen_pt.c
> @@ -597,7 +597,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
>      }
>  }
>
> -static void xen_pt_begin(MemoryListener *l)
> +static void xen_pt_begin(MemoryListener *l, PhysMap *next)
>  {
>  }
>
> diff --git a/kvm-all.c b/kvm-all.c
> index f8e4328..bc42cab 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -693,7 +693,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>      }
>  }
>
> -static void kvm_begin(MemoryListener *listener)
> +static void kvm_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/memory.c b/memory.c
> index c7f2cfd..54cdc7f 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -20,6 +20,7 @@
>  #include "kvm.h"
>  #include <assert.h>
>  #include "hw/qdev.h"
> +#include "qemu-thread.h"
>
>  #define WANT_EXEC_OBSOLETE
>  #include "exec-obsolete.h"
> @@ -192,7 +193,7 @@ typedef struct AddressSpaceOps AddressSpaceOps;
>  /* A system address space - I/O, memory, etc. */
>  struct AddressSpace {
>      MemoryRegion *root;
> -    FlatView current_map;
> +    int view_id;
>      int ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>  };
> @@ -232,11 +233,6 @@ static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
>      ++view->nr;
>  }
>
> -static void flatview_destroy(FlatView *view)
> -{
> -    g_free(view->ranges);
> -}
> -
>  static bool can_merge(FlatRange *r1, FlatRange *r2)
>  {
>      return int128_eq(addrrange_end(r1->addr), r2->addr.start)
> @@ -594,8 +590,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      MemoryRegionIoeventfd *ioeventfds = NULL;
>      AddrRange tmp;
>      unsigned i;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
>              tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
>                                    int128_sub(fr->addr.start,
> @@ -616,6 +614,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      g_free(as->ioeventfds);
>      as->ioeventfds = ioeventfds;
>      as->ioeventfd_nb = ioeventfd_nb;
> +    physmap_put(map);
>  }
>
>  static void address_space_update_topology_pass(AddressSpace *as,
> @@ -681,21 +680,23 @@ static void address_space_update_topology_pass(AddressSpace *as,
>  }
>
>
> -static void address_space_update_topology(AddressSpace *as)
> +static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
> +                                            PhysMap *next)
>  {
> -    FlatView old_view = as->current_map;
> +    FlatView old_view = prev->views[as->view_id];
>      FlatView new_view = generate_memory_topology(as->root);
>
>      address_space_update_topology_pass(as, old_view, new_view, false);
>      address_space_update_topology_pass(as, old_view, new_view, true);
> +    next->views[as->view_id] = new_view;
>
> -    as->current_map = new_view;
> -    flatview_destroy(&old_view);
>      address_space_update_ioeventfds(as);
>  }
>
>  static void memory_region_update_topology(MemoryRegion *mr)
>  {
> +    PhysMap *prev, *next;
> +
>      if (memory_region_transaction_depth) {
>          memory_region_update_pending |= !mr || mr->enabled;
>          return;
> @@ -705,16 +706,20 @@ static void memory_region_update_topology(MemoryRegion *mr)
>          return;
>      }
>
> -    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
> +     prev = cur_map_get();
> +    /* allocate PhysMap next here */
> +    next = alloc_next_map();
> +    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
>
>      if (address_space_memory.root) {
> -        address_space_update_topology(&address_space_memory);
> +        address_space_update_topology(&address_space_memory, prev, next);
>      }
>      if (address_space_io.root) {
> -        address_space_update_topology(&address_space_io);
> +        address_space_update_topology(&address_space_io, prev, next);
>      }
>
>      MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
> +    cur_map_update(next);
>
>      memory_region_update_pending = false;
>  }
> @@ -1071,7 +1076,7 @@ void memory_region_put(MemoryRegion *mr)
>
>      if (atomic_dec_and_test(&mr->ref)) {
>          /* to fix, using call_rcu( ,release) */
> -        mr->life_ops->put(mr);
> +        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
>      }
>  }
>
> @@ -1147,13 +1152,18 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>  {
>      FlatRange *fr;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
>                                            Forward, log_sync);
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
> @@ -1201,8 +1211,12 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>      FlatRange *fr;
>      CoalescedMemoryRange *cmr;
>      AddrRange tmp;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
>                                             int128_get64(fr->addr.size));
> @@ -1219,6 +1233,7 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>              }
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_coalescing(MemoryRegion *mr)
> @@ -1458,29 +1473,49 @@ static int cmp_flatrange_addr(const void *addr_, const void *fr_)
>      return 0;
>  }
>
> -static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
> +static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
>  {
> -    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
> +    return bsearch(&addr, view->ranges, view->nr,
>                     sizeof(FlatRange), cmp_flatrange_addr);
>  }
>
> +/* dec the ref, which inc by memory_region_find*/
> +void memory_region_section_put(MemoryRegionSection *mrs)
> +{
> +    if (mrs->mr != NULL) {
> +        memory_region_put(mrs->mr);
> +    }
> +}
> +
> +/* inc mr's ref. Caller need dec mr's ref */
>  MemoryRegionSection memory_region_find(MemoryRegion *address_space,
>                                         target_phys_addr_t addr, uint64_t size)
>  {
> +    PhysMap *map;
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      AddrRange range = addrrange_make(int128_make64(addr),
>                                       int128_make64(size));
> -    FlatRange *fr = address_space_lookup(as, range);
> +    FlatView *fview;
> +
> +    map = cur_map_get();
> +
> +    fview = &map->views[as->view_id];
> +    FlatRange *fr = address_space_lookup(fview, range);
>      MemoryRegionSection ret = { .mr = NULL, .size = 0 };
>
>      if (!fr) {
> +        physmap_put(map);
>          return ret;
>      }
>
> -    while (fr > as->current_map.ranges
> +    while (fr > fview->ranges
>             && addrrange_intersects(fr[-1].addr, range)) {
>          --fr;
>      }
> +    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
> +     */
> +    memory_region_get(fr->mr);
> +    physmap_put(map);
>
>      ret.mr = fr->mr;
>      range = addrrange_intersection(range, fr->addr);
> @@ -1497,10 +1532,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
>  {
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      FlatRange *fr;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_global_dirty_log_start(void)
> @@ -1519,6 +1557,8 @@ static void listener_add_address_space(MemoryListener *listener,
>                                         AddressSpace *as)
>  {
>      FlatRange *fr;
> +    PhysMap *map;
> +    FlatView *view;
>
>      if (listener->address_space_filter
>          && listener->address_space_filter != as->root) {
> @@ -1528,7 +1568,10 @@ static void listener_add_address_space(MemoryListener *listener,
>      if (global_dirty_log) {
>          listener->log_global_start(listener);
>      }
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +
> +    map = cur_map_get();
> +    view = &map->views[as->view_id];
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MemoryRegionSection section = {
>              .mr = fr->mr,
>              .address_space = as->root,
> @@ -1539,6 +1582,7 @@ static void listener_add_address_space(MemoryListener *listener,
>          };
>          listener->region_add(listener, &section);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
> @@ -1570,12 +1614,14 @@ void memory_listener_unregister(MemoryListener *listener)
>  void set_system_memory_map(MemoryRegion *mr)
>  {
>      address_space_memory.root = mr;
> +    address_space_memory.view_id = 0;
>      memory_region_update_topology(NULL);
>  }
>
>  void set_system_io_map(MemoryRegion *mr)
>  {
>      address_space_io.root = mr;
> +    address_space_io.view_id = 1;
>      memory_region_update_topology(NULL);
>  }
>
> diff --git a/memory.h b/memory.h
> index 357edd8..18442d4 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -256,7 +256,7 @@ typedef struct MemoryListener MemoryListener;
>   * Use with memory_listener_register() and memory_listener_unregister().
>   */
>  struct MemoryListener {
> -    void (*begin)(MemoryListener *listener);
> +    void (*begin)(MemoryListener *listener, PhysMap *next);
>      void (*commit)(MemoryListener *listener);
>      void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
>      void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
> @@ -829,6 +829,13 @@ void mtree_info(fprintf_function mon_printf, void *f);
>
>  void memory_region_get(MemoryRegion *mr);
>  void memory_region_put(MemoryRegion *mr);
> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
> +void physmap_get(PhysMap *map);
> +void physmap_put(PhysMap *map);
> +PhysMap *cur_map_get(void);
> +PhysMap *alloc_next_map(void);
> +void cur_map_update(PhysMap *next);
> +void physmap_init(void);
>  #endif
>
>  #endif
> diff --git a/vl.c b/vl.c
> index 1329c30..12af523 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -3346,6 +3346,7 @@ int main(int argc, char **argv, char **envp)
>      if (ram_size == 0) {
>          ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
>      }
> +    physmap_init();
>
>      configure_accelerator();
>
> diff --git a/xen-all.c b/xen-all.c
> index 59f2323..41d82fd 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -452,7 +452,7 @@ static void xen_set_memory(struct MemoryListener *listener,
>      }
>  }
>
> -static void xen_begin(MemoryListener *listener)
> +static void xen_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> --
> 1.7.4.4
>

  parent reply	other threads:[~2012-08-08 19:24 UTC|newest]

Thread overview: 154+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-08  6:25 [PATCH 0/15 v2] prepare unplug out of protection of global lock Liu Ping Fan
2012-08-08  6:25 ` [Qemu-devel] " Liu Ping Fan
2012-08-08  6:25 ` [PATCH 01/15] atomic: introduce atomic operations Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  8:55   ` Paolo Bonzini
2012-08-08  8:55     ` [Qemu-devel] " Paolo Bonzini
2012-08-08  9:02   ` Avi Kivity
2012-08-08  9:02     ` [Qemu-devel] " Avi Kivity
2012-08-08  9:05     ` 陳韋任 (Wei-Ren Chen)
2012-08-08  9:05       ` 陳韋任 (Wei-Ren Chen)
2012-08-08  9:15       ` Avi Kivity
2012-08-08  9:15         ` [Qemu-devel] " Avi Kivity
2012-08-08  9:21   ` Peter Maydell
2012-08-08  9:21     ` Peter Maydell
2012-08-08 13:09     ` Stefan Hajnoczi
2012-08-08 13:09       ` Stefan Hajnoczi
2012-08-08 13:18       ` Paolo Bonzini
2012-08-08 13:18         ` Paolo Bonzini
2012-08-08 13:32         ` Peter Maydell
2012-08-08 13:32           ` [Qemu-devel] " Peter Maydell
2012-08-08 13:49           ` Paolo Bonzini
2012-08-08 13:49             ` [Qemu-devel] " Paolo Bonzini
2012-08-08 14:00             ` Avi Kivity
2012-08-08 14:00               ` [Qemu-devel] " Avi Kivity
2012-08-08  6:25 ` [PATCH 02/15] qom: using atomic ops to re-implement object_ref Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  6:25 ` [PATCH 03/15] qom: introduce reclaimer to release obj Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:05   ` Avi Kivity
2012-08-08  9:05     ` [Qemu-devel] " Avi Kivity
2012-08-08  9:07     ` Paolo Bonzini
2012-08-08  9:07       ` [Qemu-devel] " Paolo Bonzini
2012-08-08  9:15       ` Avi Kivity
2012-08-08  9:15         ` [Qemu-devel] " Avi Kivity
2012-08-09  7:33         ` liu ping fan
2012-08-09  7:33           ` [Qemu-devel] " liu ping fan
2012-08-09  7:49           ` Paolo Bonzini
2012-08-09  7:49             ` [Qemu-devel] " Paolo Bonzini
2012-08-09  8:18             ` Avi Kivity
2012-08-09  8:18               ` [Qemu-devel] " Avi Kivity
2012-08-10  6:43               ` liu ping fan
2012-08-10  6:43                 ` [Qemu-devel] " liu ping fan
2012-08-08  9:35   ` Paolo Bonzini
2012-08-08  9:35     ` [Qemu-devel] " Paolo Bonzini
2012-08-09  7:38     ` liu ping fan
2012-08-09  7:38       ` [Qemu-devel] " liu ping fan
2012-08-08  6:25 ` [PATCH 04/15] memory: MemoryRegion topology must be stable when updating Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:13   ` Avi Kivity
2012-08-08  9:13     ` [Qemu-devel] " Avi Kivity
2012-08-09  7:28     ` liu ping fan
2012-08-09  7:28       ` [Qemu-devel] " liu ping fan
2012-08-09  8:24       ` Avi Kivity
2012-08-09  8:24         ` [Qemu-devel] " Avi Kivity
2012-08-10  6:44         ` liu ping fan
2012-08-10  6:44           ` [Qemu-devel] " liu ping fan
2012-08-13 18:28       ` Marcelo Tosatti
2012-08-13 18:28         ` [Qemu-devel] " Marcelo Tosatti
2012-08-08 19:17   ` Blue Swirl
2012-08-08 19:17     ` [Qemu-devel] " Blue Swirl
2012-08-09  7:28     ` liu ping fan
2012-08-09  7:28       ` [Qemu-devel] " liu ping fan
2012-08-09 17:09       ` Blue Swirl
2012-08-09 17:09         ` [Qemu-devel] " Blue Swirl
2012-08-08  6:25 ` [PATCH 05/15] memory: introduce life_ops to MemoryRegion Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:18   ` Avi Kivity
2012-08-08  9:18     ` [Qemu-devel] " Avi Kivity
2012-08-08  6:25 ` [PATCH 06/15] memory: use refcnt to manage MemoryRegion Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:20   ` Avi Kivity
2012-08-08  9:20     ` [Qemu-devel] " Avi Kivity
2012-08-09  7:27     ` liu ping fan
2012-08-09  7:27       ` [Qemu-devel] " liu ping fan
2012-08-09  8:38       ` Avi Kivity
2012-08-09  8:38         ` [Qemu-devel] " Avi Kivity
2012-08-10  6:44         ` liu ping fan
2012-08-10  6:44           ` [Qemu-devel] " liu ping fan
2012-08-12  8:43           ` Avi Kivity
2012-08-12  8:43             ` [Qemu-devel] " Avi Kivity
2012-08-08  6:25 ` [PATCH 07/15] memory: inc/dec mr's ref when adding/removing from mem view Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  6:25 ` [PATCH 08/15] memory: introduce PhysMap to present snapshot of toploygy Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:27   ` Avi Kivity
2012-08-08  9:27     ` [Qemu-devel] " Avi Kivity
2012-08-08 19:18   ` Blue Swirl
2012-08-08 19:18     ` [Qemu-devel] " Blue Swirl
2012-08-09  7:29     ` liu ping fan
2012-08-09  7:29       ` [Qemu-devel] " liu ping fan
2012-08-08  6:25 ` [PATCH 09/15] memory: prepare flatview and radix-tree for rcu style access Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:41   ` Avi Kivity
2012-08-08  9:41     ` [Qemu-devel] " Avi Kivity
2012-08-11  1:58     ` liu ping fan
2012-08-11  1:58       ` [Qemu-devel] " liu ping fan
2012-08-11 10:06       ` liu ping fan
2012-08-11 10:06         ` [Qemu-devel] " liu ping fan
2012-08-08 19:23   ` Blue Swirl [this message]
2012-08-08 19:23     ` Blue Swirl
2012-08-09  7:29     ` liu ping fan
2012-08-09  7:29       ` [Qemu-devel] " liu ping fan
2012-08-08  6:25 ` [PATCH 10/15] memory: change tcg related code to using PhysMap Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  6:25 ` [PATCH 11/15] lock: introduce global lock for device tree Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:41   ` Paolo Bonzini
2012-08-08  9:41     ` [Qemu-devel] " Paolo Bonzini
2012-08-09  7:28     ` liu ping fan
2012-08-09  7:28       ` [Qemu-devel] " liu ping fan
2012-08-09  7:41       ` Paolo Bonzini
2012-08-09  7:41         ` [Qemu-devel] " Paolo Bonzini
2012-08-08  9:42   ` Avi Kivity
2012-08-08  9:42     ` [Qemu-devel] " Avi Kivity
2012-08-09  7:27     ` liu ping fan
2012-08-09  7:27       ` [Qemu-devel] " liu ping fan
2012-08-09  8:31       ` Avi Kivity
2012-08-09  8:31         ` [Qemu-devel] " Avi Kivity
2012-08-08  6:25 ` [PATCH 12/15] qdev: using devtree lock to protect device's accessing Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:33   ` Peter Maydell
2012-08-08  9:33     ` [Qemu-devel] " Peter Maydell
2012-08-08  6:25 ` [PATCH 13/15] hotplug: introduce qdev_unplug_complete() to remove device from views Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:52   ` Paolo Bonzini
2012-08-08  9:52     ` [Qemu-devel] " Paolo Bonzini
2012-08-08 10:07     ` Avi Kivity
2012-08-08 10:07       ` [Qemu-devel] " Avi Kivity
2012-08-09  7:28     ` liu ping fan
2012-08-09  7:28       ` [Qemu-devel] " liu ping fan
2012-08-09  8:00       ` Paolo Bonzini
2012-08-09  8:00         ` [Qemu-devel] " Paolo Bonzini
2012-08-10  6:42         ` liu ping fan
2012-08-10  6:42           ` [Qemu-devel] " liu ping fan
2012-08-13 18:53           ` Marcelo Tosatti
2012-08-13 18:53             ` [Qemu-devel] " Marcelo Tosatti
2012-08-13 18:51         ` Marcelo Tosatti
2012-08-13 18:51           ` [Qemu-devel] " Marcelo Tosatti
2012-08-08  6:25 ` [PATCH 14/15] qom: object_unref call reclaimer Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:40   ` Paolo Bonzini
2012-08-08  9:40     ` [Qemu-devel] " Paolo Bonzini
2012-08-13 18:56   ` Marcelo Tosatti
2012-08-13 18:56     ` [Qemu-devel] " Marcelo Tosatti
2012-08-08  6:25 ` [PATCH 15/15] e1000: using new interface--unmap to unplug Liu Ping Fan
2012-08-08  6:25   ` [Qemu-devel] " Liu Ping Fan
2012-08-08  9:56   ` Paolo Bonzini
2012-08-08  9:56     ` [Qemu-devel] " Paolo Bonzini
2012-08-09  7:28     ` liu ping fan
2012-08-09  7:28       ` [Qemu-devel] " liu ping fan
2012-08-09  7:40       ` Paolo Bonzini
2012-08-09  7:40         ` [Qemu-devel] " Paolo Bonzini
2012-08-10  6:43         ` liu ping fan
2012-08-10  6:43           ` [Qemu-devel] " liu ping fan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAAu8pHv7KbH7maxzCGrxop9p=1s8frW0XefcO1RoPb_+6cJn5w@mail.gmail.com' \
    --to=blauwirbel@gmail.com \
    --cc=afaerber@suse.de \
    --cc=anthony@codemonkey.ws \
    --cc=avi@redhat.com \
    --cc=jan.kiszka@siemens.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemulist@gmail.com \
    --cc=stefanha@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.