All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Provide support for multiple frame buffers in Xen.
@ 2012-10-16 18:15 Robert Phillips
  2012-10-16 18:21 ` Robert Phillips
                   ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Robert Phillips @ 2012-10-16 18:15 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips, Robert Phillips

From: Robert Phillips <robert.phillips@virtualcomputer.com>

Support is provided for both shadow and hardware assisted paging (HAP) modes.
This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.

This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
Each monitor has a frame buffer of some size at some position in guest physical memory.
The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 xen/arch/x86/hvm/Makefile            |    3 +-
 xen/arch/x86/hvm/dirty_vram.c        |  878 ++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c               |    4 +-
 xen/arch/x86/mm/hap/hap.c            |  140 +-----
 xen/arch/x86/mm/paging.c             |  232 ++++-----
 xen/arch/x86/mm/shadow/common.c      |  335 +++++++------
 xen/arch/x86/mm/shadow/multi.c       |  169 +++----
 xen/arch/x86/mm/shadow/multi.h       |    7 +-
 xen/arch/x86/mm/shadow/types.h       |    1 +
 xen/include/asm-x86/hap.h            |    4 -
 xen/include/asm-x86/hvm/dirty_vram.h |  157 ++++++
 xen/include/asm-x86/hvm/domain.h     |    2 +-
 xen/include/asm-x86/paging.h         |   22 +-
 xen/include/asm-x86/shadow.h         |    6 -
 14 files changed, 1403 insertions(+), 557 deletions(-)
 create mode 100644 xen/arch/x86/hvm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/hvm/dirty_vram.h

diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index eea5555..f37736b 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -2,6 +2,7 @@ subdir-y += svm
 subdir-y += vmx
 
 obj-y += asid.o
+obj-y += dirty_vram.o
 obj-y += emulate.o
 obj-y += hpet.o
 obj-y += hvm.o
@@ -22,4 +23,4 @@ obj-y += vlapic.o
 obj-y += vmsi.o
 obj-y += vpic.o
 obj-y += vpt.o
-obj-y += vpmu.o
\ No newline at end of file
+obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/dirty_vram.c b/xen/arch/x86/hvm/dirty_vram.c
new file mode 100644
index 0000000..22375c2
--- /dev/null
+++ b/xen/arch/x86/hvm/dirty_vram.c
@@ -0,0 +1,878 @@
+/*
+ * arch/x86/hvm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/hvm/dirty_vram.h>
+#include "../mm/mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          1
+#define DEBUG_allocating_dirty_vram_range     1
+#define DEBUG_high_water_mark_for_vram_ranges 1
+#define DEBUG_freeing_dirty_vram_range        1
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xmalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        memset(dirty_vram, 0, sizeof(*dirty_vram));
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/* Returns domain's dirty_vram structure,
+ * allocating it if necessary */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            xfree(curr);            
+        }
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+            {
+                return range;
+            }
+        }
+    }
+    return NULL;
+}
+
+/* Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ), NULL if none. */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+            {
+                return range;
+            }
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_paddr_link_t *pl_tab = NULL;
+    int i;
+    
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+    
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+    
+    range = xmalloc(dv_range_t);
+    if (range == NULL)
+        goto err_out;
+    
+    memset(range, 0, sizeof(dv_range_t));
+    INIT_LIST_HEAD(&range->range_link);
+    
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if (!hap_enabled(d))
+    {
+        if ( (pl_tab = xmalloc_array(dv_paddr_link_t, nr)) == NULL )
+        {
+            goto err_out;
+        }
+        for (i = 0; i != nr; i++)
+        {
+            pl_tab[i].sl1ma = INVALID_PADDR;
+            pl_tab[i].pl_next = NULL;
+        }
+    }    
+    
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+    
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+        
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+        
+        if (range->pl_tab)
+        {
+            for (i = 0; i != nr; i++)
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].pl_next;
+                /* Does current FB page have multiple mappings? */
+                if (plx) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while (plx->pl_next != NULL)
+                        plx = plx->pl_next;
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+        
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/* dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls _dirty_vram_range_alloc
+ * to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+    
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+    
+    /* Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn ) */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ((rng->begin_pfn <= begin_pfn) && (begin_pfn <  rng->end_pfn)) ||
+             ((begin_pfn <= rng->begin_pfn) && (rng->begin_pfn < end_pfn)) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+        
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/* dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if (dirty_vram->pl_free == NULL) /* yes */
+    {
+        /* Allocate another page of pl's.
+         * Link them all together and point the free list head at them */
+        int i;
+        dv_paddr_link_ext_t *ext = xmalloc(dv_paddr_link_ext_t);
+        if (ext == NULL)
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for (i = 0; i != ARRAY_SIZE(ext->entries); i++)
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/* Free a paddr_link struct, given address of its predecessor in linked list */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if (ppl) /* yes. free it */
+    {
+        pl = (*ppl);
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /* move 2nd mapping to main table.
+         * and free 2nd mapping */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if (spl == NULL)
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/* dirty_vram_range_update()
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( range )
+    {
+        unsigned long i = gfn - range->begin_pfn;
+        dv_paddr_link_t *pl = &range->pl_tab[ i ];
+        dv_paddr_link_t **ppl = NULL;
+        int len = 0;
+
+        /* find matching entry (pl), if any, and its predecessor
+         * in linked list (ppl) */
+        while (pl != NULL)
+        {
+            if (pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+                break;
+            ppl = &pl->pl_next;
+            pl = *ppl;
+            len++;
+        }
+            
+        if (set)
+        {
+            /* Did we find sl1ma in either the main table or the linked list? */
+            if (pl == NULL) /* no, so we'll need to alloc a link */
+            {
+                ASSERT(ppl != NULL);
+                /* alloc link and append it to list */
+                (*ppl) = pl = alloc_paddr_link(d);
+                if (pl == NULL)
+                    goto out;
+            }
+            if ( pl->sl1ma != sl1ma )
+            {
+                pl->sl1ma = sl1ma;
+                range->nr_mappings++;
+            }
+            effective = 1;
+            if (len > range->mappings_hwm)
+            {
+                range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] set      sl1ma:%lx hwm:%d mappings:%d freepages:%d\n",
+                         gfn, sl1ma,
+                         range->mappings_hwm,
+                         range->nr_mappings,
+                         d->arch.paging.shadow.free_pages);
+#endif
+            }
+        }
+        else /* clear */
+        {
+            if (pl && pl->sl1ma == sl1ma )
+            {
+#if DEBUG_update_vram_mapping
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                         gfn, sl1ma,
+                         range->nr_mappings-1);
+#endif
+                free_paddr_link(d, ppl, pl);
+                if ( --range->nr_mappings == 0 )
+                {
+                    dirty_vram_range_free(d, range);
+                }
+                effective = 1;
+            }
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/* shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+#ifdef __i386__
+    unsigned long map_mfn = INVALID_MFN;
+    void *map_sl1p = NULL;
+#endif
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        for (pl = &range->pl_tab[i]; pl; pl = pl->pl_next, len++)
+        {
+#ifdef __i386__
+            void *sl1p;
+            unsigned long sl1mfn;
+#endif
+            l1_pgentry_t *sl1e;
+            paddr_t sl1ma = pl->sl1ma;
+            if (sl1ma == INVALID_PADDR) /* FB page is unmapped */
+                continue;
+#ifdef __i386__
+            sl1p = map_sl1p;
+            sl1mfn = paddr_to_pfn(sl1ma);
+
+            if ( sl1mfn != map_mfn )
+            {
+                if ( map_sl1p )
+                    sh_unmap_domain_page(map_sl1p);
+                map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
+                map_mfn = sl1mfn;
+            }
+            sl1e = sl1p + (sl1ma & ~PAGE_MASK);
+#else
+            sl1e = maddr_to_virt(sl1ma);
+#endif
+            if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+            {
+                dirty = 1;
+                /* Clear dirty so we can detect if page gets re-dirtied */
+                /* Note: this is atomic, so we may clear a
+                 * _PAGE_ACCESSED set by another processor. */
+                l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                flush_tlb = 1;
+            }
+        } /* for */
+        if ( dirty )
+        {
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+        }
+    }
+
+#ifdef __i386__
+    if ( map_sl1p )
+        sh_unmap_domain_page(map_sl1p);
+#endif
+    return flush_tlb;
+}
+
+
+/* shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand. 
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    if (end_pfn < begin_pfn
+            || begin_pfn > p2m->max_mapped_pfn
+            || end_pfn >= p2m->max_mapped_pfn)
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if ( !nr || guest_handle_is_null(guest_dirty_bitmap) )
+    {
+        goto out;
+    }
+
+    if ( !dirty_vram_find_or_alloc(d))
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+    
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )    
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+        
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
+        
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+            rc = 0;
+    }
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/* hap_enable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-only.
+ */
+static int hap_enable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    /* turn on PG_log_dirty bit in paging mode */
+    paging_lock(d);
+    d->arch.paging.mode |= PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+    
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    /* dirty_vram != NULL iff we're tracking dirty vram.
+     * If we start tracking dirty pages for all memory then
+     * the dirty_vram structure is freed. */
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table to be read-only. */
+    list_for_each(curr, &dirty_vram->range_head)
+      {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] enable  vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn, 
+			      p2m_ram_rw, p2m_ram_logdirty);
+      }
+        
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if (rc) 
+    {
+        paging_lock(d);
+        d->arch.paging.mode &= ~PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/* hap_disable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-write.
+ */
+static int hap_disable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    paging_lock(d);
+    d->arch.paging.mode &= ~PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+    
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+    
+    /* set l1e entries of P2M table with normal mode */
+    list_for_each(curr, &dirty_vram->range_head)
+      {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] disable vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn, 
+			      p2m_ram_logdirty, p2m_ram_rw);
+      }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if (rc) 
+    {
+        paging_lock(d);
+        d->arch.paging.mode |= PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/* hap_clean_vram_tracking_range()
+ * For all the pages in the range specified by [begin_pfn,nr),
+ * note in the dirty bitmap any page that has been marked as read-write,
+ * which signifies that the page has been dirtied, and reset the page
+ * to ram_logdirty. 
+ */
+void hap_clean_vram_tracking_range(struct domain *d,
+                                   unsigned long begin_pfn,
+                                   unsigned long nr,
+                                   uint8_t *dirty_bitmap)
+{
+    int i;
+    unsigned long pfn;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+
+    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
+    ASSERT(paging_locked_by_me(d));
+    
+    if ( !dirty_vram )
+    {
+        gdprintk(XENLOG_DEBUG, "Should only be called while tracking dirty vram.\n");
+        return;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if (!range)
+        return;
+
+    /* set l1e entries of P2M table to be read-only. */
+    /* On first write, it page faults, its entry is changed to read-write,
+     * its bit in the dirty bitmap is set, and on retry the write succeeds. */
+    for (i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++)
+    {
+        p2m_type_t pt;
+        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
+        if (pt == p2m_ram_rw)
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+static void hap_vram_tracking_init(struct domain *d)
+{
+    paging_log_dirty_init(d, hap_enable_vram_tracking,
+                          hap_disable_vram_tracking,
+                          NULL);
+}
+
+/* hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirties vram pages, by
+ * calling paging_log_dirty_range().
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    int restart_log_dirty = 0;
+
+    paging_lock(d);
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        /* Already tracking dirty vram? */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
+        {
+            /* Handle the addition of another range */
+            range = dirty_vram_range_find(d, begin_pfn, nr);
+            if ( !range )
+            {
+                rc = -ENOMEM;
+                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+                    goto param_fail;
+                restart_log_dirty = 1;
+            }
+        }
+        /* Just starting to track dirty vram? */
+        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+                goto param_fail;
+            
+            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn, nr)) )
+                goto param_fail;
+
+            restart_log_dirty = 1;
+            /* Initialize callbacks for vram tracking */
+            hap_vram_tracking_init(d);
+        }
+        else
+        {
+            /* Test for invalid combination */
+            if ( !paging_mode_log_dirty(d) && dirty_vram )
+                rc = -EINVAL;
+            else /* logging dirty of all memory, not tracking dirty vram */
+                rc = -ENODATA;
+            goto param_fail;
+        }
+        
+        if (restart_log_dirty) 
+        {
+            /* disable then enable log dirty */
+            paging_unlock(d);
+            if (paging_mode_log_dirty(d))
+                paging_log_dirty_disable(d);
+                    
+            rc = paging_log_dirty_enable(d);
+            paging_lock(d);
+            if (rc != 0)
+                goto param_fail;
+        }
+        
+        paging_unlock(d);
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+	paging_log_dirty_range(d, begin_pfn, nr, (uint8_t*)dirty_bitmap);
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else
+    {
+        /* If zero pages specified while already tracking dirty vram
+         * then stop tracking */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) {
+            paging_unlock(d);
+            rc = paging_log_dirty_disable(d);
+            paging_lock(d);
+            dirty_vram_free(d);
+        } else /* benign no-op */
+        {
+            rc = 0;
+        }
+        paging_unlock(d);
+    }
+
+    return rc;
+
+param_fail:
+    dirty_vram_free(d);
+    paging_unlock(d);
+    return rc;
+}
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a5a1bcf..55553e4 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -1433,8 +1434,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            paging_mark_dirty_hap(v->domain, gfn, mfn_x(mfn));
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index d2637d3..f31e4e5 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -41,6 +41,7 @@
 #include <asm/domain.h>
 #include <xen/numa.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/dirty_vram.h>
 
 #include "private.h"
 
@@ -53,139 +54,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-static int hap_enable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    /* turn on PG_log_dirty bit in paging mode */
-    paging_lock(d);
-    d->arch.paging.mode |= PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static int hap_disable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    paging_lock(d);
-    d->arch.paging.mode &= ~PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table with normal mode */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_logdirty, p2m_ram_rw);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static void hap_clean_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return;
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-}
-
-static void hap_vram_tracking_init(struct domain *d)
-{
-    paging_log_dirty_init(d, hap_enable_vram_tracking,
-                          hap_disable_vram_tracking,
-                          hap_clean_vram_tracking);
-}
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( nr )
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram )
-        {
-            if ( begin_pfn != dirty_vram->begin_pfn ||
-                 begin_pfn + nr != dirty_vram->end_pfn )
-            {
-                paging_log_dirty_disable(d);
-                dirty_vram->begin_pfn = begin_pfn;
-                dirty_vram->end_pfn = begin_pfn + nr;
-                rc = paging_log_dirty_enable(d);
-                if (rc != 0)
-                    goto param_fail;
-            }
-        }
-        else if ( !paging_mode_log_dirty(d) && !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-                goto param_fail;
-
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-            hap_vram_tracking_init(d);
-            rc = paging_log_dirty_enable(d);
-            if (rc != 0)
-                goto param_fail;
-        }
-        else
-        {
-            if ( !paging_mode_log_dirty(d) && dirty_vram )
-                rc = -EINVAL;
-            else
-                rc = -ENODATA;
-            goto param_fail;
-        }
-        /* get the bitmap */
-        rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-    }
-    else
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram ) {
-            rc = paging_log_dirty_disable(d);
-            xfree(dirty_vram);
-            dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-        } else
-            rc = 0;
-    }
-
-    return rc;
-
-param_fail:
-    if ( dirty_vram )
-    {
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
@@ -223,14 +91,12 @@ static void hap_clean_dirty_bitmap(struct domain *d)
 
 void hap_logdirty_init(struct domain *d)
 {
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( paging_mode_log_dirty(d) && dirty_vram )
     {
         paging_log_dirty_disable(d);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+        dirty_vram_free(d);
     }
-
     /* Reinitialize logdirty mechanism */
     paging_log_dirty_init(d, hap_enable_log_dirty,
                           hap_disable_log_dirty,
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index ca879f9..7464b07 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -278,6 +279,46 @@ out:
 }
 
 
+/* paging_mark_dirty_hap()
+ * Make a hap page writeable and mark it as dirty.
+ * This done atomically under the p2m and paging locks to avoid leaving
+ * a window where the page might be modified without being marked as dirty.
+ */
+void paging_mark_dirty_hap(struct domain *d,
+                           unsigned long pfn,
+                           unsigned long guest_mfn)
+{
+    mfn_t gmfn;
+    p2m_type_t pt;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    
+    if ( !paging_mode_log_dirty(d) )
+        return;
+
+    gmfn = _mfn(guest_mfn);
+
+    ASSERT( mfn_valid(gmfn) &&
+            page_get_owner(mfn_to_page(gmfn)) == d );
+
+    p2m_lock(p2m);
+    pt = p2m_change_type(d, pfn, p2m_ram_logdirty, p2m_ram_rw);
+    paging_lock(d);
+    if ( pt == p2m_ram_logdirty )
+    {
+        dv_range_t *range;
+        PAGING_DEBUG(LOGDIRTY,
+                     "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+                     mfn_x(gmfn), pfn, d->domain_id);
+        d->arch.paging.log_dirty.dirty_count++;
+        range = dirty_vram_range_find_gfn(d, pfn);
+        if (range)
+            range->dirty_count++;
+    }
+    paging_mark_dirty(d, guest_mfn); 
+    paging_unlock(d);
+    p2m_unlock(p2m);
+}
+
 /* Is this guest page dirty? */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
 {
@@ -333,8 +374,11 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
     mfn_t *l4, *l3, *l2;
     unsigned long *l1;
     int i4, i3, i2;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     domain_pause(d);
+    /* Locking hierarchy requires p2m lock to be taken first */
+    p2m_lock(p2m);
     paging_lock(d);
 
     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
@@ -345,6 +389,14 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
                  d->arch.paging.log_dirty.fault_count,
                  d->arch.paging.log_dirty.dirty_count);
 
+    if (hap_enabled(d) && d->arch.hvm_domain.dirty_vram)
+    {
+        /* If we're cleaning/peeking all guest memory, we should not be tracking
+         * dirty vram. */
+        rv = -EINVAL;
+        goto out;
+    }
+
     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
 
@@ -424,170 +476,60 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 
     if ( clean )
     {
-        /* We need to further call clean_dirty_bitmap() functions of specific
-         * paging modes (shadow or hap).  Safe because the domain is paused. */
-        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        /* Is null if tracking dirty vram */
+        if (d->arch.paging.log_dirty.clean_dirty_bitmap)
+        {
+            /* We need to further call clean_dirty_bitmap() functions of specific
+             * paging modes (shadow or hap).  Safe because the domain is paused. */
+            d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        }
     }
     domain_unpause(d);
     return rv;
 
  out:
     paging_unlock(d);
+    p2m_unlock(p2m);
     domain_unpause(d);
     return rv;
 }
 
-int paging_log_dirty_range(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+void paging_log_dirty_range(struct domain *d,
+                           unsigned long begin_pfn,
+                           unsigned long nr,
+                           uint8_t *dirty_bitmap)
 {
-    int rv = 0;
-    unsigned long pages = 0;
-    mfn_t *l4, *l3, *l2;
-    unsigned long *l1;
-    int b1, b2, b3, b4;
-    int i2, i3, i4;
-
-    d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    dv_range_t *range;
+    unsigned int range_dirty_count = 0;
+    
+    p2m_lock(p2m);
     paging_lock(d);
 
-    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
-                 d->domain_id,
-                 d->arch.paging.log_dirty.fault_count,
-                 d->arch.paging.log_dirty.dirty_count);
-
-    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
-        printk("%s: %d failed page allocs while logging dirty pages\n",
-               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
-        rv = -ENOMEM;
-        goto out;
-    }
-
-    if ( !d->arch.paging.log_dirty.fault_count &&
-         !d->arch.paging.log_dirty.dirty_count ) {
-        unsigned int size = BITS_TO_LONGS(nr);
-
-        if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
-            rv = -EFAULT;
-        goto out;
-    }
-    d->arch.paging.log_dirty.fault_count = 0;
-    d->arch.paging.log_dirty.dirty_count = 0;
-
-    b1 = L1_LOGDIRTY_IDX(begin_pfn);
-    b2 = L2_LOGDIRTY_IDX(begin_pfn);
-    b3 = L3_LOGDIRTY_IDX(begin_pfn);
-    b4 = L4_LOGDIRTY_IDX(begin_pfn);
-    l4 = paging_map_log_dirty_bitmap(d);
-
-    for ( i4 = b4;
-          (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
-          i4++ )
+    /* Only called when tracking dirty vram in HAP mode */
+    ASSERT(hap_enabled(d) && d->arch.hvm_domain.dirty_vram);
+    
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if (range)
     {
-        l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-        for ( i3 = b3;
-              (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
-              i3++ )
-        {
-            l2 = ((l3 && mfn_valid(l3[i3])) ?
-                  map_domain_page(mfn_x(l3[i3])) : NULL);
-            for ( i2 = b2;
-                  (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
-                  i2++ )
-            {
-                unsigned int bytes = PAGE_SIZE;
-                uint8_t *s;
-                l1 = ((l2 && mfn_valid(l2[i2])) ?
-                      map_domain_page(mfn_x(l2[i2])) : NULL);
-
-                s = ((uint8_t*)l1) + (b1 >> 3);
-                bytes -= b1 >> 3;
-
-                if ( likely(((nr - pages + 7) >> 3) < bytes) )
-                    bytes = (unsigned int)((nr - pages + 7) >> 3);
-
-                if ( !l1 )
-                {
-                    if ( clear_guest_offset(dirty_bitmap, pages >> 3,
-                                            bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                /* begin_pfn is not 32K aligned, hence we have to bit
-                 * shift the bitmap */
-                else if ( b1 & 0x7 )
-                {
-                    int i, j;
-                    uint32_t *l = (uint32_t*) s;
-                    int bits = b1 & 0x7;
-                    int bitmask = (1 << bits) - 1;
-                    int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
-                    unsigned long bitmap[size];
-                    static unsigned long printed = 0;
-
-                    if ( printed != begin_pfn )
-                    {
-                        dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
-                                __FUNCTION__, begin_pfn);
-                        printed = begin_pfn;
-                    }
-
-                    for ( i = 0; i < size - 1; i++, l++ ) {
-                        bitmap[i] = ((*l) >> bits) |
-                            (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
-                    }
-                    s = (uint8_t*) l;
-                    size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
-                    bitmap[i] = 0;
-                    for ( j = 0; j < size; j++, s++ )
-                        bitmap[i] |= (*s) << (j * 8);
-                    bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
-                    if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
-                                (uint8_t*) bitmap, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                else
-                {
-                    if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
-                                              s, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-
-                pages += bytes << 3;
-                if ( l1 )
-                {
-                    clear_page(l1);
-                    unmap_domain_page(l1);
-                }
-                b1 = b1 & 0x7;
-            }
-            b2 = 0;
-            if ( l2 )
-                unmap_domain_page(l2);
-        }
-        b3 = 0;
-        if ( l3 )
-            unmap_domain_page(l3);
+        range_dirty_count = range->dirty_count;
+        range->dirty_count = 0;
     }
-    if ( l4 )
-        unmap_domain_page(l4);
-
-    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
 
-    return rv;
+    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 range->end_pfn,
+                 range_dirty_count);
 
+    hap_clean_vram_tracking_range(d, begin_pfn, nr, dirty_bitmap);
  out:
     paging_unlock(d);
-    return rv;
+    p2m_unlock(p2m);
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 3f8ad88..c9f3495 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/hvm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3463,179 +3459,212 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 
 /**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
-
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          1
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ((l1_shadow_mask & (1 << shadow_type)) == 0)
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for (i = 0; i != range->end_pfn - range->begin_pfn; i++)
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ];
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while (pl != NULL)
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if (sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if (sl1mn == mfn_x(smfn)) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if (mappings > max_mappings)
+            max_mappings = mappings;
+        
+        if (unshadowed) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
+
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index b0e6d72..f4d0603 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/hvm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,57 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ * Testing L1PTEs as they are modified, look for when they start to (or cease to)
+ * point to frame buffer pages.  If the old and new gfns differ, calls
+ * dirty_vram_range_update() to updates the dirty_vram structures
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if (old_gfn == new_gfn) return;
+
+    if (VALID_M2P(old_gfn))
+        if (dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/))
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n", old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if (VALID_M2P(new_gfn))
+        if (dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/))
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n", new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1164,14 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
+                shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1186,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1972,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4289,34 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+    
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, /* only returns _PAGE_PRESENT entries */
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if (!mfn_valid(gmfn))
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if (VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn)) 
+        {
+            paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index a2532a4..82e20c7 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/dirty_vram.h b/xen/include/asm-x86/hvm/dirty_vram.h
new file mode 100644
index 0000000..b8b92cc
--- /dev/null
+++ b/xen/include/asm-x86/hvm/dirty_vram.h
@@ -0,0 +1,157 @@
+/******************************************************************************
+ * include/asm-x86/hvm/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/* In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this
+ * by recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings */
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+/* This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together. */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[(PAGE_SIZE-sizeof(struct list_head))/sizeof(dv_paddr_link_t)];
+} dv_paddr_link_ext_t;
+
+/* This defines a single frame buffer range.  It bookkeeps all the level 1 PTEs
+ * that map guest pages within that range.
+ * All such ranges (of a domain) are linked together. */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_paddr_link_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/* This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers. */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/* Returns domain's dirty_vram structure,
+ * allocating it if necessary */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/* Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ), NULL if none */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/* Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range. */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/* Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range. */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn);
+
+
+/* Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/* Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..6146542 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram * dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index d9b6950..fba06b0 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -137,10 +137,10 @@ struct paging_mode {
 void paging_free_log_dirty_bitmap(struct domain *d);
 
 /* get the dirty bitmap for a specific range of pfns */
-int paging_log_dirty_range(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+void paging_log_dirty_range(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            uint8_t *dirty_bitmap);
 
 /* enable log dirty */
 int paging_log_dirty_enable(struct domain *d);
@@ -161,6 +161,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 
+/* mark a page as dirty, from hap page fault handler */
+void paging_mark_dirty_hap(struct domain *d,
+                           unsigned long pfn,
+                           unsigned long guest_mfn);
+
 /*
  * Log-dirty radix tree indexing:
  *   All tree nodes are PAGE_SIZE bytes, mapped on-demand.
@@ -183,15 +188,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 88a8cd2..bdb8dcd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-10-16 18:15 [PATCH] Provide support for multiple frame buffers in Xen Robert Phillips
@ 2012-10-16 18:21 ` Robert Phillips
  2012-10-22 16:10 ` Tim Deegan
  2012-11-01 14:24 ` Tim Deegan
  2 siblings, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2012-10-16 18:21 UTC (permalink / raw)
  To: Robert Phillips, xen-devel

[-- Attachment #1: Type: text/plain, Size: 83644 bytes --]

Here is a document describing this patch.

Robert Phillips
Principal Software Engineer,  XenClient-Enterprise - Westford
robert.phillips@citrix.com

-----Original Message-----
From: Robert Phillips [mailto:robert.phillips@citrix.com]
Sent: Tuesday, October 16, 2012 2:15 PM
To: xen-devel@lists.xen.org
Cc: Robert Phillips; Robert Phillips
Subject: [PATCH] Provide support for multiple frame buffers in Xen.

From: Robert Phillips <robert.phillips@virtualcomputer.com>

Support is provided for both shadow and hardware assisted paging (HAP) modes.
This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.

This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
Each monitor has a frame buffer of some size at some position in guest physical memory.
The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 xen/arch/x86/hvm/Makefile            |    3 +-
 xen/arch/x86/hvm/dirty_vram.c        |  878 ++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c               |    4 +-
 xen/arch/x86/mm/hap/hap.c            |  140 +-----
 xen/arch/x86/mm/paging.c             |  232 ++++-----
 xen/arch/x86/mm/shadow/common.c      |  335 +++++++------
 xen/arch/x86/mm/shadow/multi.c       |  169 +++----
 xen/arch/x86/mm/shadow/multi.h       |    7 +-
 xen/arch/x86/mm/shadow/types.h       |    1 +
 xen/include/asm-x86/hap.h            |    4 -
 xen/include/asm-x86/hvm/dirty_vram.h |  157 ++++++
 xen/include/asm-x86/hvm/domain.h     |    2 +-
 xen/include/asm-x86/paging.h         |   22 +-
 xen/include/asm-x86/shadow.h         |    6 -
 14 files changed, 1403 insertions(+), 557 deletions(-)
 create mode 100644 xen/arch/x86/hvm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/hvm/dirty_vram.h

diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index eea5555..f37736b 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -2,6 +2,7 @@ subdir-y += svm
 subdir-y += vmx

 obj-y += asid.o
+obj-y += dirty_vram.o
 obj-y += emulate.o
 obj-y += hpet.o
 obj-y += hvm.o
@@ -22,4 +23,4 @@ obj-y += vlapic.o
 obj-y += vmsi.o
 obj-y += vpic.o
 obj-y += vpt.o
-obj-y += vpmu.o
\ No newline at end of file
+obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/dirty_vram.c b/xen/arch/x86/hvm/dirty_vram.c
new file mode 100644
index 0000000..22375c2
--- /dev/null
+++ b/xen/arch/x86/hvm/dirty_vram.c
@@ -0,0 +1,878 @@
+/*
+ * arch/x86/hvm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/hvm/dirty_vram.h>
+#include "../mm/mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          1
+#define DEBUG_allocating_dirty_vram_range     1
+#define DEBUG_high_water_mark_for_vram_ranges 1
+#define DEBUG_freeing_dirty_vram_range        1
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xmalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        memset(dirty_vram, 0, sizeof(*dirty_vram));
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/* Returns domain's dirty_vram structure,
+ * allocating it if necessary */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            xfree(curr);
+        }
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+            {
+                return range;
+            }
+        }
+    }
+    return NULL;
+}
+
+/* Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ), NULL if none. */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+            {
+                return range;
+            }
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_paddr_link_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xmalloc(dv_range_t);
+    if (range == NULL)
+        goto err_out;
+
+    memset(range, 0, sizeof(dv_range_t));
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if (!hap_enabled(d))
+    {
+        if ( (pl_tab = xmalloc_array(dv_paddr_link_t, nr)) == NULL )
+        {
+            goto err_out;
+        }
+        for (i = 0; i != nr; i++)
+        {
+            pl_tab[i].sl1ma = INVALID_PADDR;
+            pl_tab[i].pl_next = NULL;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if (range->pl_tab)
+        {
+            for (i = 0; i != nr; i++)
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].pl_next;
+                /* Does current FB page have multiple mappings? */
+                if (plx) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while (plx->pl_next != NULL)
+                        plx = plx->pl_next;
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/* dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls _dirty_vram_range_alloc
+ * to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /* Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn ) */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ((rng->begin_pfn <= begin_pfn) && (begin_pfn <  rng->end_pfn)) ||
+             ((begin_pfn <= rng->begin_pfn) && (rng->begin_pfn < end_pfn)) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/* dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if (dirty_vram->pl_free == NULL) /* yes */
+    {
+        /* Allocate another page of pl's.
+         * Link them all together and point the free list head at them */
+        int i;
+        dv_paddr_link_ext_t *ext = xmalloc(dv_paddr_link_ext_t);
+        if (ext == NULL)
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for (i = 0; i != ARRAY_SIZE(ext->entries); i++)
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/* Free a paddr_link struct, given address of its predecessor in linked list */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if (ppl) /* yes. free it */
+    {
+        pl = (*ppl);
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /* move 2nd mapping to main table.
+         * and free 2nd mapping */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if (spl == NULL)
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/* dirty_vram_range_update()
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( range )
+    {
+        unsigned long i = gfn - range->begin_pfn;
+        dv_paddr_link_t *pl = &range->pl_tab[ i ];
+        dv_paddr_link_t **ppl = NULL;
+        int len = 0;
+
+        /* find matching entry (pl), if any, and its predecessor
+         * in linked list (ppl) */
+        while (pl != NULL)
+        {
+            if (pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+                break;
+            ppl = &pl->pl_next;
+            pl = *ppl;
+            len++;
+        }
+
+        if (set)
+        {
+            /* Did we find sl1ma in either the main table or the linked list? */
+            if (pl == NULL) /* no, so we'll need to alloc a link */
+            {
+                ASSERT(ppl != NULL);
+                /* alloc link and append it to list */
+                (*ppl) = pl = alloc_paddr_link(d);
+                if (pl == NULL)
+                    goto out;
+            }
+            if ( pl->sl1ma != sl1ma )
+            {
+                pl->sl1ma = sl1ma;
+                range->nr_mappings++;
+            }
+            effective = 1;
+            if (len > range->mappings_hwm)
+            {
+                range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] set      sl1ma:%lx hwm:%d mappings:%d freepages:%d\n",
+                         gfn, sl1ma,
+                         range->mappings_hwm,
+                         range->nr_mappings,
+                         d->arch.paging.shadow.free_pages);
+#endif
+            }
+        }
+        else /* clear */
+        {
+            if (pl && pl->sl1ma == sl1ma )
+            {
+#if DEBUG_update_vram_mapping
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                         gfn, sl1ma,
+                         range->nr_mappings-1);
+#endif
+                free_paddr_link(d, ppl, pl);
+                if ( --range->nr_mappings == 0 )
+                {
+                    dirty_vram_range_free(d, range);
+                }
+                effective = 1;
+            }
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/* shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+#ifdef __i386__
+    unsigned long map_mfn = INVALID_MFN;
+    void *map_sl1p = NULL;
+#endif
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        for (pl = &range->pl_tab[i]; pl; pl = pl->pl_next, len++)
+        {
+#ifdef __i386__
+            void *sl1p;
+            unsigned long sl1mfn;
+#endif
+            l1_pgentry_t *sl1e;
+            paddr_t sl1ma = pl->sl1ma;
+            if (sl1ma == INVALID_PADDR) /* FB page is unmapped */
+                continue;
+#ifdef __i386__
+            sl1p = map_sl1p;
+            sl1mfn = paddr_to_pfn(sl1ma);
+
+            if ( sl1mfn != map_mfn )
+            {
+                if ( map_sl1p )
+                    sh_unmap_domain_page(map_sl1p);
+                map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
+                map_mfn = sl1mfn;
+            }
+            sl1e = sl1p + (sl1ma & ~PAGE_MASK);
+#else
+            sl1e = maddr_to_virt(sl1ma);
+#endif
+            if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+            {
+                dirty = 1;
+                /* Clear dirty so we can detect if page gets re-dirtied */
+                /* Note: this is atomic, so we may clear a
+                 * _PAGE_ACCESSED set by another processor. */
+                l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                flush_tlb = 1;
+            }
+        } /* for */
+        if ( dirty )
+        {
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+        }
+    }
+
+#ifdef __i386__
+    if ( map_sl1p )
+        sh_unmap_domain_page(map_sl1p);
+#endif
+    return flush_tlb;
+}
+
+
+/* shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    if (end_pfn < begin_pfn
+            || begin_pfn > p2m->max_mapped_pfn
+            || end_pfn >= p2m->max_mapped_pfn)
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if ( !nr || guest_handle_is_null(guest_dirty_bitmap) )
+    {
+        goto out;
+    }
+
+    if ( !dirty_vram_find_or_alloc(d))
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+
+       flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+            rc = 0;
+    }
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/* hap_enable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-only.
+ */
+static int hap_enable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    /* turn on PG_log_dirty bit in paging mode */
+    paging_lock(d);
+    d->arch.paging.mode |= PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    /* dirty_vram != NULL iff we're tracking dirty vram.
+     * If we start tracking dirty pages for all memory then
+     * the dirty_vram structure is freed. */
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table to be read-only. */
+    list_for_each(curr, &dirty_vram->range_head)
+      {
+       dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+       gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] enable  vram tracking\n",
+                range->begin_pfn, range->end_pfn);
+       p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+                             p2m_ram_rw, p2m_ram_logdirty);
+      }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if (rc)
+    {
+        paging_lock(d);
+        d->arch.paging.mode &= ~PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/* hap_disable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-write.
+ */
+static int hap_disable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    paging_lock(d);
+    d->arch.paging.mode &= ~PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table with normal mode */
+    list_for_each(curr, &dirty_vram->range_head)
+      {
+       dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+       gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] disable vram tracking\n",
+                range->begin_pfn, range->end_pfn);
+       p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+                             p2m_ram_logdirty, p2m_ram_rw);
+      }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if (rc)
+    {
+        paging_lock(d);
+        d->arch.paging.mode |= PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/* hap_clean_vram_tracking_range()
+ * For all the pages in the range specified by [begin_pfn,nr),
+ * note in the dirty bitmap any page that has been marked as read-write,
+ * which signifies that the page has been dirtied, and reset the page
+ * to ram_logdirty.
+ */
+void hap_clean_vram_tracking_range(struct domain *d,
+                                   unsigned long begin_pfn,
+                                   unsigned long nr,
+                                   uint8_t *dirty_bitmap)
+{
+    int i;
+    unsigned long pfn;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+
+    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
+    ASSERT(paging_locked_by_me(d));
+
+    if ( !dirty_vram )
+    {
+        gdprintk(XENLOG_DEBUG, "Should only be called while tracking dirty vram.\n");
+        return;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if (!range)
+        return;
+
+    /* set l1e entries of P2M table to be read-only. */
+    /* On first write, it page faults, its entry is changed to read-write,
+     * its bit in the dirty bitmap is set, and on retry the write succeeds. */
+    for (i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++)
+    {
+        p2m_type_t pt;
+        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
+        if (pt == p2m_ram_rw)
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+static void hap_vram_tracking_init(struct domain *d)
+{
+    paging_log_dirty_init(d, hap_enable_vram_tracking,
+                          hap_disable_vram_tracking,
+                          NULL);
+}
+
+/* hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirties vram pages, by
+ * calling paging_log_dirty_range().
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    int restart_log_dirty = 0;
+
+    paging_lock(d);
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        /* Already tracking dirty vram? */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
+        {
+            /* Handle the addition of another range */
+            range = dirty_vram_range_find(d, begin_pfn, nr);
+            if ( !range )
+            {
+                rc = -ENOMEM;
+                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+                    goto param_fail;
+                restart_log_dirty = 1;
+            }
+        }
+        /* Just starting to track dirty vram? */
+        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+                goto param_fail;
+
+            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn, nr)) )
+                goto param_fail;
+
+            restart_log_dirty = 1;
+            /* Initialize callbacks for vram tracking */
+            hap_vram_tracking_init(d);
+        }
+        else
+        {
+            /* Test for invalid combination */
+            if ( !paging_mode_log_dirty(d) && dirty_vram )
+                rc = -EINVAL;
+            else /* logging dirty of all memory, not tracking dirty vram */
+                rc = -ENODATA;
+            goto param_fail;
+        }
+
+        if (restart_log_dirty)
+        {
+            /* disable then enable log dirty */
+            paging_unlock(d);
+            if (paging_mode_log_dirty(d))
+                paging_log_dirty_disable(d);
+
+            rc = paging_log_dirty_enable(d);
+            paging_lock(d);
+            if (rc != 0)
+                goto param_fail;
+        }
+
+        paging_unlock(d);
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+       paging_log_dirty_range(d, begin_pfn, nr, (uint8_t*)dirty_bitmap);
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else
+    {
+        /* If zero pages specified while already tracking dirty vram
+         * then stop tracking */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) {
+            paging_unlock(d);
+            rc = paging_log_dirty_disable(d);
+            paging_lock(d);
+            dirty_vram_free(d);
+        } else /* benign no-op */
+        {
+            rc = 0;
+        }
+        paging_unlock(d);
+    }
+
+    return rc;
+
+param_fail:
+    dirty_vram_free(d);
+    paging_unlock(d);
+    return rc;
+}
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a5a1bcf..55553e4 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -1433,8 +1434,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            paging_mark_dirty_hap(v->domain, gfn, mfn_x(mfn));
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index d2637d3..f31e4e5 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -41,6 +41,7 @@
 #include <asm/domain.h>
 #include <xen/numa.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/dirty_vram.h>

 #include "private.h"

@@ -53,139 +54,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))

 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-static int hap_enable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    /* turn on PG_log_dirty bit in paging mode */
-    paging_lock(d);
-    d->arch.paging.mode |= PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static int hap_disable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    paging_lock(d);
-    d->arch.paging.mode &= ~PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table with normal mode */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
-                          p2m_ram_logdirty, p2m_ram_rw);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static void hap_clean_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return;
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-}
-
-static void hap_vram_tracking_init(struct domain *d)
-{
-    paging_log_dirty_init(d, hap_enable_vram_tracking,
-                          hap_disable_vram_tracking,
-                          hap_clean_vram_tracking);
-}
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( nr )
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram )
-        {
-            if ( begin_pfn != dirty_vram->begin_pfn ||
-                 begin_pfn + nr != dirty_vram->end_pfn )
-            {
-                paging_log_dirty_disable(d);
-                dirty_vram->begin_pfn = begin_pfn;
-                dirty_vram->end_pfn = begin_pfn + nr;
-                rc = paging_log_dirty_enable(d);
-                if (rc != 0)
-                    goto param_fail;
-            }
-        }
-        else if ( !paging_mode_log_dirty(d) && !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-                goto param_fail;
-
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-            hap_vram_tracking_init(d);
-            rc = paging_log_dirty_enable(d);
-            if (rc != 0)
-                goto param_fail;
-        }
-        else
-        {
-            if ( !paging_mode_log_dirty(d) && dirty_vram )
-                rc = -EINVAL;
-            else
-                rc = -ENODATA;
-            goto param_fail;
-        }
-        /* get the bitmap */
-        rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-    }
-    else
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram ) {
-            rc = paging_log_dirty_disable(d);
-            xfree(dirty_vram);
-            dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-        } else
-            rc = 0;
-    }
-
-    return rc;
-
-param_fail:
-    if ( dirty_vram )
-    {
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/

@@ -223,14 +91,12 @@ static void hap_clean_dirty_bitmap(struct domain *d)

 void hap_logdirty_init(struct domain *d)
 {
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( paging_mode_log_dirty(d) && dirty_vram )
     {
         paging_log_dirty_disable(d);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+        dirty_vram_free(d);
     }
-
     /* Reinitialize logdirty mechanism */
     paging_log_dirty_init(d, hap_enable_log_dirty,
                           hap_disable_log_dirty,
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index ca879f9..7464b07 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>

@@ -278,6 +279,46 @@ out:
 }


+/* paging_mark_dirty_hap()
+ * Make a hap page writeable and mark it as dirty.
+ * This done atomically under the p2m and paging locks to avoid leaving
+ * a window where the page might be modified without being marked as dirty.
+ */
+void paging_mark_dirty_hap(struct domain *d,
+                           unsigned long pfn,
+                           unsigned long guest_mfn)
+{
+    mfn_t gmfn;
+    p2m_type_t pt;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    if ( !paging_mode_log_dirty(d) )
+        return;
+
+    gmfn = _mfn(guest_mfn);
+
+    ASSERT( mfn_valid(gmfn) &&
+            page_get_owner(mfn_to_page(gmfn)) == d );
+
+    p2m_lock(p2m);
+    pt = p2m_change_type(d, pfn, p2m_ram_logdirty, p2m_ram_rw);
+    paging_lock(d);
+    if ( pt == p2m_ram_logdirty )
+    {
+        dv_range_t *range;
+        PAGING_DEBUG(LOGDIRTY,
+                     "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+                     mfn_x(gmfn), pfn, d->domain_id);
+        d->arch.paging.log_dirty.dirty_count++;
+        range = dirty_vram_range_find_gfn(d, pfn);
+        if (range)
+            range->dirty_count++;
+    }
+    paging_mark_dirty(d, guest_mfn);
+    paging_unlock(d);
+    p2m_unlock(p2m);
+}
+
 /* Is this guest page dirty? */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
 {
@@ -333,8 +374,11 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
     mfn_t *l4, *l3, *l2;
     unsigned long *l1;
     int i4, i3, i2;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);

     domain_pause(d);
+    /* Locking hierarchy requires p2m lock to be taken first */
+    p2m_lock(p2m);
     paging_lock(d);

     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
@@ -345,6 +389,14 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
                  d->arch.paging.log_dirty.fault_count,
                  d->arch.paging.log_dirty.dirty_count);

+    if (hap_enabled(d) && d->arch.hvm_domain.dirty_vram)
+    {
+        /* If we're cleaning/peeking all guest memory, we should not be tracking
+         * dirty vram. */
+        rv = -EINVAL;
+        goto out;
+    }
+
     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;

@@ -424,170 +476,60 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)

     if ( clean )
     {
-        /* We need to further call clean_dirty_bitmap() functions of specific
-         * paging modes (shadow or hap).  Safe because the domain is paused. */
-        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        /* Is null if tracking dirty vram */
+        if (d->arch.paging.log_dirty.clean_dirty_bitmap)
+        {
+            /* We need to further call clean_dirty_bitmap() functions of specific
+             * paging modes (shadow or hap).  Safe because the domain is paused. */
+            d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        }
     }
     domain_unpause(d);
     return rv;

  out:
     paging_unlock(d);
+    p2m_unlock(p2m);
     domain_unpause(d);
     return rv;
 }

-int paging_log_dirty_range(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+void paging_log_dirty_range(struct domain *d,
+                           unsigned long begin_pfn,
+                           unsigned long nr,
+                           uint8_t *dirty_bitmap)
 {
-    int rv = 0;
-    unsigned long pages = 0;
-    mfn_t *l4, *l3, *l2;
-    unsigned long *l1;
-    int b1, b2, b3, b4;
-    int i2, i3, i4;
-
-    d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    dv_range_t *range;
+    unsigned int range_dirty_count = 0;
+
+    p2m_lock(p2m);
     paging_lock(d);

-    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
-                 d->domain_id,
-                 d->arch.paging.log_dirty.fault_count,
-                 d->arch.paging.log_dirty.dirty_count);
-
-    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
-        printk("%s: %d failed page allocs while logging dirty pages\n",
-               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
-        rv = -ENOMEM;
-        goto out;
-    }
-
-    if ( !d->arch.paging.log_dirty.fault_count &&
-         !d->arch.paging.log_dirty.dirty_count ) {
-        unsigned int size = BITS_TO_LONGS(nr);
-
-        if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
-            rv = -EFAULT;
-        goto out;
-    }
-    d->arch.paging.log_dirty.fault_count = 0;
-    d->arch.paging.log_dirty.dirty_count = 0;
-
-    b1 = L1_LOGDIRTY_IDX(begin_pfn);
-    b2 = L2_LOGDIRTY_IDX(begin_pfn);
-    b3 = L3_LOGDIRTY_IDX(begin_pfn);
-    b4 = L4_LOGDIRTY_IDX(begin_pfn);
-    l4 = paging_map_log_dirty_bitmap(d);
-
-    for ( i4 = b4;
-          (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
-          i4++ )
+    /* Only called when tracking dirty vram in HAP mode */
+    ASSERT(hap_enabled(d) && d->arch.hvm_domain.dirty_vram);
+
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if (range)
     {
-        l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-        for ( i3 = b3;
-              (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
-              i3++ )
-        {
-            l2 = ((l3 && mfn_valid(l3[i3])) ?
-                  map_domain_page(mfn_x(l3[i3])) : NULL);
-            for ( i2 = b2;
-                  (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
-                  i2++ )
-            {
-                unsigned int bytes = PAGE_SIZE;
-                uint8_t *s;
-                l1 = ((l2 && mfn_valid(l2[i2])) ?
-                      map_domain_page(mfn_x(l2[i2])) : NULL);
-
-                s = ((uint8_t*)l1) + (b1 >> 3);
-                bytes -= b1 >> 3;
-
-                if ( likely(((nr - pages + 7) >> 3) < bytes) )
-                    bytes = (unsigned int)((nr - pages + 7) >> 3);
-
-                if ( !l1 )
-                {
-                    if ( clear_guest_offset(dirty_bitmap, pages >> 3,
-                                            bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                /* begin_pfn is not 32K aligned, hence we have to bit
-                 * shift the bitmap */
-                else if ( b1 & 0x7 )
-                {
-                    int i, j;
-                    uint32_t *l = (uint32_t*) s;
-                    int bits = b1 & 0x7;
-                    int bitmask = (1 << bits) - 1;
-                    int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
-                    unsigned long bitmap[size];
-                    static unsigned long printed = 0;
-
-                    if ( printed != begin_pfn )
-                    {
-                        dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
-                                __FUNCTION__, begin_pfn);
-                        printed = begin_pfn;
-                    }
-
-                    for ( i = 0; i < size - 1; i++, l++ ) {
-                        bitmap[i] = ((*l) >> bits) |
-                            (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
-                    }
-                    s = (uint8_t*) l;
-                    size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
-                    bitmap[i] = 0;
-                    for ( j = 0; j < size; j++, s++ )
-                        bitmap[i] |= (*s) << (j * 8);
-                    bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
-                    if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
-                                (uint8_t*) bitmap, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                else
-                {
-                    if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
-                                              s, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-
-                pages += bytes << 3;
-                if ( l1 )
-                {
-                    clear_page(l1);
-                    unmap_domain_page(l1);
-                }
-                b1 = b1 & 0x7;
-            }
-            b2 = 0;
-            if ( l2 )
-                unmap_domain_page(l2);
-        }
-        b3 = 0;
-        if ( l3 )
-            unmap_domain_page(l3);
+        range_dirty_count = range->dirty_count;
+        range->dirty_count = 0;
     }
-    if ( l4 )
-        unmap_domain_page(l4);
-
-    paging_unlock(d);
+
+    if ( !range_dirty_count)
+        goto out;

-    return rv;
+    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 range->end_pfn,
+                 range_dirty_count);

+    hap_clean_vram_tracking_range(d, begin_pfn, nr, dirty_bitmap);
  out:
     paging_unlock(d);
-    return rv;
+    p2m_unlock(p2m);
+    return;
 }

 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 3f8ad88..c9f3495 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/hvm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"

@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;

-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);

     paging_unlock(d);

@@ -3463,179 +3459,212 @@ void shadow_clean_dirty_bitmap(struct domain *d)


 /**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
-
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */

-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          1

-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask =
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+
+    if ((l1_shadow_mask & (1 << shadow_type)) == 0)
     {
-        rc = 0;
         goto out;
     }

-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for (i = 0; i != range->end_pfn - range->begin_pfn; i++)
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ];
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+
+            while (pl != NULL)
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+
+                if (sl1ma == INVALID_PADDR )
                     break;
+
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if (sl1mn == mfn_x(smfn)) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if (mappings > max_mappings)
+            max_mappings = mappings;
+
+        if (unshadowed) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);
             }
         }
+    }
+ out:
+    return;
+}
+
+
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v,
+                            unsigned int callback_mask,
+                            hash_pfn_callback_t callbacks[],
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and
+ * calling the appropriate callback function for each entry.
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan.
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;

-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x),
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break;
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}

-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };

-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask =
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }

+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */

diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index b0e6d72..f4d0603 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/hvm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }

@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }

-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,57 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }

-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ * Testing L1PTEs as they are modified, look for when they start to (or cease to)
+ * point to frame buffer pages.  If the old and new gfns differ, calls
+ * dirty_vram_range_update() to updates the dirty_vram structures
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 {
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;

-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;

-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);

-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }

-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if (old_gfn == new_gfn) return;
+
+    if (VALID_M2P(old_gfn))
+        if (dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/))
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n", old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if (VALID_M2P(new_gfn))
+        if (dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/))
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n", new_gfn, mfn_x(new_mfn));
         }
-    }
 }

 static int shadow_set_l1e(struct vcpu *v,
@@ -1211,12 +1164,14 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
+                shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     }

+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1186,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) )
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         }
@@ -2018,7 +1972,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4289,34 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }

+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, /* only returns _PAGE_PRESENT entries */
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if (!mfn_valid(gmfn))
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if (VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn))
+        {
+            paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */

diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn,
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index a2532a4..82e20c7 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);

 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);

diff --git a/xen/include/asm-x86/hvm/dirty_vram.h b/xen/include/asm-x86/hvm/dirty_vram.h
new file mode 100644
index 0000000..b8b92cc
--- /dev/null
+++ b/xen/include/asm-x86/hvm/dirty_vram.h
@@ -0,0 +1,157 @@
+/******************************************************************************
+ * include/asm-x86/hvm/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/* In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this
+ * by recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings */
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+/* This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together. */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[(PAGE_SIZE-sizeof(struct list_head))/sizeof(dv_paddr_link_t)];
+} dv_paddr_link_ext_t;
+
+/* This defines a single frame buffer range.  It bookkeeps all the level 1 PTEs
+ * that map guest pages within that range.
+ * All such ranges (of a domain) are linked together. */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_paddr_link_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/* This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers. */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/* Returns domain's dirty_vram structure,
+ * allocating it if necessary */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/* Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ), NULL if none */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/* Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range. */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/* Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range. */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/* Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/* Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+                       unsigned long first_pfn,
+                       unsigned long nr,
+                       XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+                    unsigned long begin_pfn,
+                    unsigned long nr,
+                    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+                             unsigned long begin_pfn,
+                             unsigned long nr,
+                             uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..6146542 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;

     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram * dirty_vram;

     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index d9b6950..fba06b0 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -137,10 +137,10 @@ struct paging_mode {
 void paging_free_log_dirty_bitmap(struct domain *d);

 /* get the dirty bitmap for a specific range of pfns */
-int paging_log_dirty_range(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+void paging_log_dirty_range(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            uint8_t *dirty_bitmap);

 /* enable log dirty */
 int paging_log_dirty_enable(struct domain *d);
@@ -161,6 +161,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);

+/* mark a page as dirty, from hap page fault handler */
+void paging_mark_dirty_hap(struct domain *d,
+                           unsigned long pfn,
+                           unsigned long guest_mfn);
+
 /*
  * Log-dirty radix tree indexing:
  *   All tree nodes are PAGE_SIZE bytes, mapped on-demand.
@@ -183,15 +188,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif

-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */

diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 88a8cd2..bdb8dcd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);

-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
--
1.7.9.5


[-- Attachment #2: multi-dirty-vram.pdf --]
[-- Type: application/pdf, Size: 525873 bytes --]

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-10-16 18:15 [PATCH] Provide support for multiple frame buffers in Xen Robert Phillips
  2012-10-16 18:21 ` Robert Phillips
@ 2012-10-22 16:10 ` Tim Deegan
  2012-10-22 17:45   ` Robert Phillips
  2012-11-01 14:24 ` Tim Deegan
  2 siblings, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2012-10-22 16:10 UTC (permalink / raw)
  To: Robert Phillips; +Cc: Robert Phillips, xen-devel

At 14:15 -0400 on 16 Oct (1350396902), Robert Phillips wrote:
> From: Robert Phillips <robert.phillips@virtualcomputer.com>
> 
> Support is provided for both shadow and hardware assisted paging (HAP) modes.
> This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> 
> This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.
> 
> Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
> ---
>  xen/arch/x86/hvm/Makefile            |    3 +-
>  xen/arch/x86/hvm/dirty_vram.c        |  878 ++++++++++++++++++++++++++++++++++
>  xen/arch/x86/hvm/hvm.c               |    4 +-
>  xen/arch/x86/mm/hap/hap.c            |  140 +-----
>  xen/arch/x86/mm/paging.c             |  232 ++++-----
>  xen/arch/x86/mm/shadow/common.c      |  335 +++++++------
>  xen/arch/x86/mm/shadow/multi.c       |  169 +++----
>  xen/arch/x86/mm/shadow/multi.h       |    7 +-
>  xen/arch/x86/mm/shadow/types.h       |    1 +
>  xen/include/asm-x86/hap.h            |    4 -
>  xen/include/asm-x86/hvm/dirty_vram.h |  157 ++++++
>  xen/include/asm-x86/hvm/domain.h     |    2 +-
>  xen/include/asm-x86/paging.h         |   22 +-
>  xen/include/asm-x86/shadow.h         |    6 -
>  14 files changed, 1403 insertions(+), 557 deletions(-)

Wow.  That's a bunch of code! :)  Thanks for sending it, and for the
document too.

You don't say why it's useful to have multiple framebuffers -- I take it
this is useful for laptop environments?  Also, I'm a bit surprised not
to see some hypercall API changes to go with it.

Reading the PDF you sent, it looks like you've implemented log-dirty in
two new ways: by scanning the shadow PTEs and by scanning EPT entries.
But you also leave the old ways (trap-on-access and populate a bitmap) 
despite saying "The eliminated code was complex, buggy and unnecessary".
If the bitmap-tracking code is really buggy we need to know about it, as
that's what we use for live migration!  What's the problem with it?

I've had a brief skim of the code, and it looks rather over-complex for
what it does.  I'm not sure what the purpose of all these linked lists
of ranges is -- couldn't this just be done with a struct rangeset
describing which areas are being tracked?  Then since we already
maintain a sparse bitmap to hold the dirty log we don't need any other
metadata, or other ways of checking dirtiness.

Style nits: you're not consistently following the Xen coding style, in
particular the spaces around parentheses in 'if's and 'for's.  Also
you've got some code in new files that's clearly at least derived from
old code but just got your own name and copyright at the head of the
file.  Please carry the copyright over from old code when you move it.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-10-22 16:10 ` Tim Deegan
@ 2012-10-22 17:45   ` Robert Phillips
  2012-11-01 11:03     ` Tim Deegan
  0 siblings, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2012-10-22 17:45 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: xen-devel

Tim,

Thank you for taking the time (and I expect it was a considerable time) to review this patch.

And thanks for pointing out the coding style nits.  I'll re-submit the patch when the dust settles regarding its contents.


re " You don't say why it's useful to have multiple framebuffers ..." 
Yes, in a laptop environment there can be multiple monitors, each with its own frame buffer, and the monitors can be plugged/unplugged dynamically.
So the number of monitors and the location and size of their framebuffers is dynamic.


re: The " complex, buggy and unnecessary " code eliminated by this patch.
I was refering to the function paging_log_dirty_range().

I believe the bug was toward the end of the function where it used to call  clear_page(l1)
The function copies bits from l1 into a temporary bitmap, then copies them from there to the user-provided dirty_bitmap.
When it's done, it clears the page at l1.
But two framebuffers might cohabit that page, not overlapping but at distinction areas within it.
Reading the dirtiness for one frame buffer and then clearing the whole page wipes out information "owned" by the other frame buffer.
This bug would not show up if there is only one frame buffer so your live migration code is ok.

The old code wasn't really necessary because there is an easier way of telling if some page is dirty.
Yes, one can look at the dirty bit as the old code did.
Or one can check if the page's type was switched to p2m_ram_rw, which is what the new function hap_clean_vram_tracking_range() does.

In terms of complexity, the new hap_clean_vram_tracking_range() is much more simple than the old paging_log_dirty_range().  


re: " implemented log-dirty in two new ways"

The HAP code path scans EPT entries.  Obviously the shadow code does not.
The shadow code does substantial bookkeeping, which I'll get to in a minute, which the HAP path does not.
Although HAP does bookkeep the set of ranges, all the linked-list stuff isn't used or allocated.

In shadow mode the new code makes a clean separation between determining the interesting set of PTEs and examining those PTEs looking for dirty bits.

When a new range is detected, the code *does* scan all PTEs looking for ones that map into the new range but (unlike the old code) that is a one-time event.
>From then on the range's pages are kept by "dead reckoning", i.e. hooking all changes to PTEs to see if they pertain to the range.
Unfortunately when a process terminates it doesn't tear down its page tables;  it just abandons them.
So a range can get left pointing to a PTE that is no longer in a page table page.
But, in due time, the shadow code recognizes this and removes them.  The new code hooks that too, and updates its ranges accordingly.
When a no-longer-in-use range's pages have all been recycled, the range deletes itself.

So the overhead of  bookkeeping a range's set of interesting PTEs is low.

And when it's time to look for dirty bits, we know precisely which PTEs to look at.
The old code used to scan all page tables periodically and we would see a performance hit with precisely that periodicity.

One unfortunate bit of complexity relates to the fact that several PTEs can map to the same guest physical page.
We have to bookkeep them all, so each PTEs that maps to a guest physical page must be represented by its own dv_paddr_link, 
and, for the set that relate to the same guest page, they are all linked together.
The head of the linked list is the entry in the range's pl_tab array that corresponds to that guest physical page.


re: " since we already maintain a sparse bitmap to hold the dirty log"
I don't believe the dirty log is maintained except when page_mode is set for PG_log_dirty.
That mode exists for live migrate and the discipline for entering/leaving it is quite different than the finer granularity needed for dirty vram.

Thanks
-- rsp
Robert Phillips 
Principal Software Engineer,  XenClient - Citrix - Westford


-----Original Message-----
From: Tim Deegan [mailto:tim@xen.org] 
Sent: Monday, October 22, 2012 12:10 PM
To: Robert Phillips
Cc: xen-devel@lists.xen.org; Robert Phillips
Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers in Xen.

At 14:15 -0400 on 16 Oct (1350396902), Robert Phillips wrote:
> From: Robert Phillips <robert.phillips@virtualcomputer.com>
> 
> Support is provided for both shadow and hardware assisted paging (HAP) modes.
> This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> 
> This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.
> 
> Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
> ---
>  xen/arch/x86/hvm/Makefile            |    3 +-
>  xen/arch/x86/hvm/dirty_vram.c        |  878 ++++++++++++++++++++++++++++++++++
>  xen/arch/x86/hvm/hvm.c               |    4 +-
>  xen/arch/x86/mm/hap/hap.c            |  140 +-----
>  xen/arch/x86/mm/paging.c             |  232 ++++-----
>  xen/arch/x86/mm/shadow/common.c      |  335 +++++++------
>  xen/arch/x86/mm/shadow/multi.c       |  169 +++----
>  xen/arch/x86/mm/shadow/multi.h       |    7 +-
>  xen/arch/x86/mm/shadow/types.h       |    1 +
>  xen/include/asm-x86/hap.h            |    4 -
>  xen/include/asm-x86/hvm/dirty_vram.h |  157 ++++++
>  xen/include/asm-x86/hvm/domain.h     |    2 +-
>  xen/include/asm-x86/paging.h         |   22 +-
>  xen/include/asm-x86/shadow.h         |    6 -
>  14 files changed, 1403 insertions(+), 557 deletions(-)

Wow.  That's a bunch of code! :)  Thanks for sending it, and for the
document too.

You don't say why it's useful to have multiple framebuffers -- I take it
this is useful for laptop environments?  Also, I'm a bit surprised not
to see some hypercall API changes to go with it.

Reading the PDF you sent, it looks like you've implemented log-dirty in
two new ways: by scanning the shadow PTEs and by scanning EPT entries.
But you also leave the old ways (trap-on-access and populate a bitmap) 
despite saying "The eliminated code was complex, buggy and unnecessary".
If the bitmap-tracking code is really buggy we need to know about it, as
that's what we use for live migration!  What's the problem with it?

I've had a brief skim of the code, and it looks rather over-complex for
what it does.  I'm not sure what the purpose of all these linked lists
of ranges is -- couldn't this just be done with a struct rangeset
describing which areas are being tracked?  Then since we already
maintain a sparse bitmap to hold the dirty log we don't need any other
metadata, or other ways of checking dirtiness.

Style nits: you're not consistently following the Xen coding style, in
particular the spaces around parentheses in 'if's and 'for's.  Also
you've got some code in new files that's clearly at least derived from
old code but just got your own name and copyright at the head of the
file.  Please carry the copyright over from old code when you move it.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-10-22 17:45   ` Robert Phillips
@ 2012-11-01 11:03     ` Tim Deegan
  2012-11-01 11:07       ` Robert Phillips
  0 siblings, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2012-11-01 11:03 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

Hi Robert,

I've spent a bit more time digging around and I think I have a better
idea of why you've done things the way you did.  The 'simple' version I
was thinking of doesn't work as well as I thought. :|

At 13:45 -0400 on 22 Oct (1350913547), Robert Phillips wrote:
> I believe the bug was toward the end of the function where it used to
> call clear_page(l1) The function copies bits from l1 into a temporary
> bitmap, then copies them from there to the user-provided dirty_bitmap.
> When it's done, it clears the page at l1.  But two framebuffers might
> cohabit that page, not overlapping but at distinction areas within it.
> Reading the dirtiness for one frame buffer and then clearing the whole
> page wipes out information "owned" by the other frame buffer.  This
> bug would not show up if there is only one frame buffer so your live
> migration code is ok.

Yep, understood.

> And when it's time to look for dirty bits, we know precisely which
> PTEs to look at.  The old code used to scan all page tables
> periodically and we would see a performance hit with precisely that
> periodicity.

Was this caused by the 2-second timeout where it would try to unmap the
vram if it hadn't been dirtied in that time?  Were you finding that it
would unmap and then immediately try to map it again?

> One unfortunate bit of complexity relates to the fact that several
> PTEs can map to the same guest physical page.  We have to bookkeep
> them all

The old code basically relied on this not happening (assuming that
framebuffers would be mapped only once).  Is that assumption just
wrong?  Is it broken by things like DirectX?

> so each PTEs that maps to a guest physical page must be
> represented by its own dv_paddr_link, and, for the set that relate to
> the same guest page, they are all linked together.  The head of the
> linked list is the entry in the range's pl_tab array that corresponds
> to that guest physical page.

Right; you've built a complete reverse mapping from pfn to ptes.

> re: " since we already maintain a sparse bitmap to hold the dirty log"
> I don't believe the dirty log is maintained except when page_mode is set for PG_log_dirty.
> That mode exists for live migrate and the discipline for entering/leaving it is quite different than the finer granularity needed for dirty vram.

Sorry, I had got confused there.  I was thinking that we could move over
more to PG_log_dirty-style operation, where we'd trap on writes and
update the bitmap (since we now keep that bitmap as a trie the
sparseness would be OK).  But on closer inspection the cost of clearing
all the mappings when the bitmap is cleared would be either too much
overhead (throw away _all_ shadows every time) or about as complex as
the mechanism needed to scan the _PAGE_DIRTY bits.

I think I have a better idea of the intention of the patch now; I'll go
over the code in detail today.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-11-01 11:03     ` Tim Deegan
@ 2012-11-01 11:07       ` Robert Phillips
  2012-11-01 11:43         ` Ian Campbell
  0 siblings, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2012-11-01 11:07 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: xen-devel

Thanks, Tim, for your review.

I still owe xen-devel a revised version of the patch that adheres better to the Xen coding style conventions.
(Excuse my ignorance but) are those documented somewhere?

-- rsp

-----Original Message-----
From: Tim Deegan [mailto:tim@xen.org] 
Sent: Thursday, November 01, 2012 7:03 AM
To: Robert Phillips
Cc: xen-devel@lists.xen.org
Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers in Xen.

Hi Robert,

I've spent a bit more time digging around and I think I have a better idea of why you've done things the way you did.  The 'simple' version I was thinking of doesn't work as well as I thought. :|

At 13:45 -0400 on 22 Oct (1350913547), Robert Phillips wrote:
> I believe the bug was toward the end of the function where it used to 
> call clear_page(l1) The function copies bits from l1 into a temporary 
> bitmap, then copies them from there to the user-provided dirty_bitmap.
> When it's done, it clears the page at l1.  But two framebuffers might 
> cohabit that page, not overlapping but at distinction areas within it.
> Reading the dirtiness for one frame buffer and then clearing the whole 
> page wipes out information "owned" by the other frame buffer.  This 
> bug would not show up if there is only one frame buffer so your live 
> migration code is ok.

Yep, understood.

> And when it's time to look for dirty bits, we know precisely which 
> PTEs to look at.  The old code used to scan all page tables 
> periodically and we would see a performance hit with precisely that 
> periodicity.

Was this caused by the 2-second timeout where it would try to unmap the vram if it hadn't been dirtied in that time?  Were you finding that it would unmap and then immediately try to map it again?

> One unfortunate bit of complexity relates to the fact that several 
> PTEs can map to the same guest physical page.  We have to bookkeep 
> them all

The old code basically relied on this not happening (assuming that framebuffers would be mapped only once).  Is that assumption just wrong?  Is it broken by things like DirectX?

> so each PTEs that maps to a guest physical page must be represented by 
> its own dv_paddr_link, and, for the set that relate to the same guest 
> page, they are all linked together.  The head of the linked list is 
> the entry in the range's pl_tab array that corresponds to that guest 
> physical page.

Right; you've built a complete reverse mapping from pfn to ptes.

> re: " since we already maintain a sparse bitmap to hold the dirty log"
> I don't believe the dirty log is maintained except when page_mode is set for PG_log_dirty.
> That mode exists for live migrate and the discipline for entering/leaving it is quite different than the finer granularity needed for dirty vram.

Sorry, I had got confused there.  I was thinking that we could move over more to PG_log_dirty-style operation, where we'd trap on writes and update the bitmap (since we now keep that bitmap as a trie the sparseness would be OK).  But on closer inspection the cost of clearing all the mappings when the bitmap is cleared would be either too much overhead (throw away _all_ shadows every time) or about as complex as the mechanism needed to scan the _PAGE_DIRTY bits.

I think I have a better idea of the intention of the patch now; I'll go over the code in detail today.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-11-01 11:07       ` Robert Phillips
@ 2012-11-01 11:43         ` Ian Campbell
  0 siblings, 0 replies; 35+ messages in thread
From: Ian Campbell @ 2012-11-01 11:43 UTC (permalink / raw)
  To: Robert Phillips; +Cc: Tim (Xen.org), xen-devel

Please can you avoid top posting, it makes conversations harder to
follow.

On Thu, 2012-11-01 at 11:07 +0000, Robert Phillips wrote:
> Thanks, Tim, for your review.
> 
> I still owe xen-devel a revised version of the patch that adheres better to the Xen coding style conventions.
> (Excuse my ignorance but) are those documented somewhere?

It's in the file "CODING_STYLE" at the top-level of the source tree.

Ian.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-10-16 18:15 [PATCH] Provide support for multiple frame buffers in Xen Robert Phillips
  2012-10-16 18:21 ` Robert Phillips
  2012-10-22 16:10 ` Tim Deegan
@ 2012-11-01 14:24 ` Tim Deegan
  2012-11-07 20:36   ` Robert Phillips
  2 siblings, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2012-11-01 14:24 UTC (permalink / raw)
  To: Robert Phillips; +Cc: Robert Phillips, xen-devel

Hi, 

At 14:15 -0400 on 16 Oct (1350396902), Robert Phillips wrote:
> From: Robert Phillips <robert.phillips@virtualcomputer.com>
> 
> Support is provided for both shadow and hardware assisted paging (HAP) modes.
> This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> 
> This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.

Having read through this in detail, it's looking very plausible. :)
A few style nits:
 - please use the Xen spacings around 'if ( foo )' and 'for ( x; y; z )';
 - there's a bit of trailing whitespace in the new file, and a few
   places where indentation seems to have gone a bit wrong;
 - please make sure the whole thing is linewrapped to <80 characters; and
 - there's no need for braces around single-line blocks.

More substantive comments:
 - I think the dirty_vram.c and dirty_vram.h files belong under mm/
   rather than under hvm/.  The ``#include "../mm/mm-locks.h"'' is 
   an indicator that this is really MM code. 
 - Please use xzalloc() rather than xmalloc() + memset(0).  It avoids
   the sizes of alloc and memset getting out of sync.
 - The i386 build is dead, so you can drop some #ifdef __i386__ sections.
 - There really ought to be some limit on how many PTEs you're willing
   to track.  Otherwise a large guest can consume lots and lots of Xen's
   memory by making lots of PTEs that point to framebuffers.  That
   might also lead to performance problems, e.g. in the unshadow
   function that walks over all those liked lists. 
   Also, I think that the memory for the paddr_links ought to come from
   the shadow pool (i.e. using domain->arch.paging.alloc_page())
   rather than soaking up otherwise free memory.

A few other detailed comments below...

> +/* Free a paddr_link struct, given address of its predecessor in linked list */
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                dv_paddr_link_t **ppl,
> +                dv_paddr_link_t *pl)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_t *npl; /* next pl */
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* extension mapping? */
> +    if (ppl) /* yes. free it */
> +    {
> +        pl = (*ppl);

This assignment seems like it should always be a noop.  Would it be
correct to replace it with ASSERT(pl == *ppl)?

> +        (*ppl) = npl = pl->pl_next;
> +    }
> +    else  /* main table */
> +    {
> +        /* move 2nd mapping to main table.
> +         * and free 2nd mapping */
> +        dv_paddr_link_t * spl;
> +        spl = pl->pl_next;
> +        if (spl == NULL)
> +        {
> +            pl->sl1ma = INVALID_PADDR;
> +            return pl;
> +        }
> +        pl->sl1ma = spl->sl1ma;
> +        pl->pl_next = spl->pl_next;
> +        npl = pl; /* reprocess main table entry again */
> +        pl = spl;

OK, that took a lot of staring at to be sure it's right. :)  I'd be
inclined to just put all paddr_links in the linked list (and have an
array of pointers rather than an array of paddr_link_ts).  Is it worth
having the extra complexity here, and at the callers, to avoid a single
memory read?

> +    }
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = dirty_vram->pl_free;
> +    dirty_vram->pl_free = pl;
> +    return npl;
> +}
> +
> +
> +/* dirty_vram_range_update()
> + * This is called whenever a level 1 page table entry is modified.
> + * If the L1PTE is being cleared, the function removes any paddr_links
> + * that refer to it.
> + * If the L1PTE is being set to a frame buffer page, a paddr_link is
> + * created for that page's entry in pl_tab.
> + * Returns 1 iff entry found and set or cleared.
> + */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set)
> +{
> +    int effective = 0;
> +    dv_range_t *range;
> +
> +    ASSERT(paging_locked_by_me(d));
> +    range = dirty_vram_range_find_gfn(d, gfn);
> +    if ( range )
> +    {

I think this would be more readable as 'if ( !range ) return 0' here 
rather than indenting most of the function. 

> +        unsigned long i = gfn - range->begin_pfn;
> +        dv_paddr_link_t *pl = &range->pl_tab[ i ];
> +        dv_paddr_link_t **ppl = NULL;
> +        int len = 0;
> +
> +        /* find matching entry (pl), if any, and its predecessor
> +         * in linked list (ppl) */
> +        while (pl != NULL)
> +        {
> +            if (pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
> +                break;
> +            ppl = &pl->pl_next;
> +            pl = *ppl;
> +            len++;
> +        }
> +            
> +        if (set)
> +        {
> +            /* Did we find sl1ma in either the main table or the linked list? */
> +            if (pl == NULL) /* no, so we'll need to alloc a link */
> +            {
> +                ASSERT(ppl != NULL);
> +                /* alloc link and append it to list */
> +                (*ppl) = pl = alloc_paddr_link(d);
> +                if (pl == NULL)
> +                    goto out;

This needs to signal some sort of error.  Otherwise, if we can't add
this sl1e to the list we'll just silently fail to track it.

> +            }
> +            if ( pl->sl1ma != sl1ma )
> +            {

ASSERT(pl->sl1ma == INVALID_PADDR) ? 

> +                pl->sl1ma = sl1ma;
> +                range->nr_mappings++;
> +            }
> +            effective = 1;
> +            if (len > range->mappings_hwm)
> +            {
> +                range->mappings_hwm = len;
> +#if DEBUG_update_vram_mapping
> +                gdprintk(XENLOG_DEBUG,
> +                         "[%lx] set      sl1ma:%lx hwm:%d mappings:%d freepages:%d\n",
> +                         gfn, sl1ma,
> +                         range->mappings_hwm,
> +                         range->nr_mappings,
> +                         d->arch.paging.shadow.free_pages);
> +#endif
> +            }
> +        }
> +        else /* clear */
> +        {
> +            if (pl && pl->sl1ma == sl1ma )
> +            {
> +#if DEBUG_update_vram_mapping
> +                gdprintk(XENLOG_DEBUG,
> +                         "[%lx] clear    sl1ma:%lx mappings:%d\n",
> +                         gfn, sl1ma,
> +                         range->nr_mappings-1);
> +#endif
> +                free_paddr_link(d, ppl, pl);
> +                if ( --range->nr_mappings == 0 )
> +                {
> +                    dirty_vram_range_free(d, range);

What's this for?  If the guest unmaps the framebuffer and remaps it (or
if the shadow PTs of the mappings are temporarily discarded) this will
stop us from tracking the new mappings until the toolstack asks for the
bitmap (and then it will be expensive to go and find the mappings).

> +                }
> +                effective = 1;
> +            }
> +        }
> +    }
> + out:
> +    return effective;
> +}

> +/* shadow_track_dirty_vram()
> + * This is the API called by the guest to determine which pages in the range
> + * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
> + * It creates the domain's dv_dirty_vram on demand. 
> + * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
> + * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
> + * It copies the dirty bitmask into guest storage.
> + */
> +int shadow_track_dirty_vram(struct domain *d,
> +                            unsigned long begin_pfn,
> +                            unsigned long nr,
> +                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    int rc = 0;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    int flush_tlb = 0;
> +    dv_range_t *range;
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +
> +    if (end_pfn < begin_pfn
> +            || begin_pfn > p2m->max_mapped_pfn
> +            || end_pfn >= p2m->max_mapped_pfn)

I know you just copied this from the old definition but the limits seem
wrong here -- I think it should be:

    if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )


> +/* hap_clean_vram_tracking_range()
> + * For all the pages in the range specified by [begin_pfn,nr),
> + * note in the dirty bitmap any page that has been marked as read-write,
> + * which signifies that the page has been dirtied, and reset the page
> + * to ram_logdirty. 
> + */
> +void hap_clean_vram_tracking_range(struct domain *d,
> +                                   unsigned long begin_pfn,
> +                                   unsigned long nr,
> +                                   uint8_t *dirty_bitmap)
> +{
> +    int i;
> +    unsigned long pfn;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range;
> +
> +    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
> +    ASSERT(paging_locked_by_me(d));
> +    
> +    if ( !dirty_vram )
> +    {
> +        gdprintk(XENLOG_DEBUG, "Should only be called while tracking dirty vram.\n");
> +        return;
> +    }
> +
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if (!range)
> +        return;

Oughtn't we to return all 1s in the bitmap here?  If the range isn't
currently being tracked we should conservatively assume it's all dirty,
right?

> +
> +    /* set l1e entries of P2M table to be read-only. */
> +    /* On first write, it page faults, its entry is changed to read-write,
> +     * its bit in the dirty bitmap is set, and on retry the write succeeds. */
> +    for (i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++)
> +    {
> +        p2m_type_t pt;
> +        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
> +        if (pt == p2m_ram_rw)
> +            dirty_bitmap[i >> 3] |= (1 << (i & 7));
> +    }
> +    flush_tlb_mask(d->domain_dirty_cpumask);
> +}
> +
> +static void hap_vram_tracking_init(struct domain *d)
> +{
> +    paging_log_dirty_init(d, hap_enable_vram_tracking,
> +                          hap_disable_vram_tracking,
> +                          NULL);
> +}
> +
> +/* hap_track_dirty_vram()
> + * Create the domain's dv_dirty_vram struct on demand.
> + * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is first encountered.
> + * Collect the guest_dirty bitmask, a bit mask of the dirties vram pages, by
> + * calling paging_log_dirty_range().
> + */
> +int hap_track_dirty_vram(struct domain *d,
> +                         unsigned long begin_pfn,
> +                         unsigned long nr,
> +                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    long rc = 0;
> +    dv_dirty_vram_t *dirty_vram;
> +    int restart_log_dirty = 0;
> +
> +    paging_lock(d);
> +    dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    if ( nr )
> +    {
> +        dv_range_t *range = NULL;
> +        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
> +        unsigned long dirty_bitmap[size];

All the users of this array cast to (uint8_t *) -- just declare it as
uint8_t * instead?

> +
> +        /* Already tracking dirty vram? */
> +        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
> +        {
> +            /* Handle the addition of another range */
> +            range = dirty_vram_range_find(d, begin_pfn, nr);
> +            if ( !range )
> +            {
> +                rc = -ENOMEM;
> +                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
> +                    goto param_fail;
> +                restart_log_dirty = 1;
> +            }
> +        }
> +        /* Just starting to track dirty vram? */
> +        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
> +        {
> +            rc = -ENOMEM;
> +            if ( !(dirty_vram = dirty_vram_alloc(d)) )
> +                goto param_fail;
> +            
> +            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn, nr)) )
> +                goto param_fail;
> +
> +            restart_log_dirty = 1;
> +            /* Initialize callbacks for vram tracking */
> +            hap_vram_tracking_init(d);
> +        }
> +        else
> +        {
> +            /* Test for invalid combination */
> +            if ( !paging_mode_log_dirty(d) && dirty_vram )
> +                rc = -EINVAL;
> +            else /* logging dirty of all memory, not tracking dirty vram */
> +                rc = -ENODATA;
> +            goto param_fail;
> +        }
> +        
> +        if (restart_log_dirty) 
> +        {
> +            /* disable then enable log dirty */

Why disable and re-enable?  The call to paging_log_dirty_range() below
will reset the p2m entries of the range you care about, so I think all
you need to do is enable it in the 'just starting' case above.

And, since you know you're in HAP mode, not in log-dirty mode, and
already have the paging lock, you can just set 
d->arch.paging.mode |= PG_log_dirty there rather than jumping through
the paging_log_dirty_enable() path and messing with locks.

> +            paging_unlock(d);
> +            if (paging_mode_log_dirty(d))
> +                paging_log_dirty_disable(d);
> +          
> +            rc = paging_log_dirty_enable(d);
> +            paging_lock(d);
> +            if (rc != 0)
> +                goto param_fail;
> +        }
> +        
> +        paging_unlock(d);
> +        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
> +	paging_log_dirty_range(d, begin_pfn, nr, (uint8_t*)dirty_bitmap);
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           (uint8_t*)dirty_bitmap,
> +                           size * BYTES_PER_LONG) == 0 )
> +        {
> +            rc = 0;
> +        }
> +    }
> +    else
> +    {
> +        /* If zero pages specified while already tracking dirty vram
> +         * then stop tracking */
> +        if ( paging_mode_log_dirty(d) && dirty_vram ) {
> +            paging_unlock(d);
> +            rc = paging_log_dirty_disable(d);
> +            paging_lock(d);
> +            dirty_vram_free(d);

This is different from the shadow case -- there, IIUC, you just ignore
requests where nr == 0; here, you tear down all vram tracking.
Can you choose one of those, and document it in the public header?

> +        } else /* benign no-op */
> +        {
> +            rc = 0;
> +        }
> +        paging_unlock(d);
> +    }
> +
> +    return rc;


> +/* paging_mark_dirty_hap()
> + * Make a hap page writeable and mark it as dirty.
> + * This done atomically under the p2m and paging locks to avoid leaving
> + * a window where the page might be modified without being marked as dirty.
> + */

I'm perplexed by this -- AFAICT it's either not necessary (because all
log-dirty read/clean ops are done with the domain paused) or not sufficient
(because although the bitmap and the PTE are updated under the p2m lock,
the actual dirtying of the page happens at some other time).  Can you
spell out for me exactly what this is protecting against?

> +typedef int (*hash_pfn_callback_t)(struct vcpu *v,
> +                                   mfn_t smfn,
> +                                   unsigned long begin_pfn,
> +                                   unsigned long end_pfn,
> +                                   int *removed);
> +
> +static int hash_pfn_foreach(struct vcpu *v, 
> +                            unsigned int callback_mask, 
> +                            hash_pfn_callback_t callbacks[], 
> +                            unsigned long begin_pfn,
> +                            unsigned long end_pfn)
> +/* Walk the hash table looking at the types of the entries and 
> + * calling the appropriate callback function for each entry. 
> + * The mask determines which shadow types we call back for, and the array
> + * of callbacks tells us which function to call.
> + * Any callback may return non-zero to let us skip the rest of the scan. 
> + *
> + * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
> + * then return non-zero to terminate the scan. */

This code duplication is a bit much.  I think you should recast the
existing hash_foreach() function to take a pointer as its fourth
argument instead of an MFN, and then make the existing callers just cast
their MFN argument as a pointer.   The you can use the same function,
passing a pointer to a struct { begin, end, removed }.

Please make the changes to hash_foreach() in a separate patch from the
dirty_vram stuff. 

> @@ -1211,12 +1164,14 @@ static int shadow_set_l1e(struct vcpu *v,
>                  shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
>                  /* fall through */
>              case 0:
> -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
> +                shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
>                  break;
>              }
>          }
>      } 
>  
> +    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);

Why is this being called twice here?

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-11-01 14:24 ` Tim Deegan
@ 2012-11-07 20:36   ` Robert Phillips
  2012-11-08 13:25     ` Tim Deegan
  0 siblings, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2012-11-07 20:36 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: xen-devel

Hi Tim,

Thank you for your in-depth review.
Concurrent with this email I will submit a revised patch (version 2) containing the changes I mention below.

-- rsp

> -----Original Message-----
> From: Tim Deegan [mailto:tim@xen.org]
> Sent: Thursday, November 01, 2012 10:25 AM
> To: Robert Phillips
> Cc: xen-devel@lists.xen.org; Robert Phillips
> Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers
> in Xen.
>
> Hi,
>
> At 14:15 -0400 on 16 Oct (1350396902), Robert Phillips wrote:
> > From: Robert Phillips <robert.phillips@virtualcomputer.com>
> >
> > Support is provided for both shadow and hardware assisted paging (HAP)
> modes.
> > This code bookkeeps the set of video frame buffers (vram),
> > detects when the guest has modified any of those buffers and, upon
> request,
> > returns a bitmap of the modified pages.
> >
> > This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> > Each monitor has a frame buffer of some size at some position in guest
> physical memory.
> > The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
>
> Having read through this in detail, it's looking very plausible. :)
> A few style nits:

[ Done

>  - please use the Xen spacings around 'if ( foo )' and 'for ( x; y; z )';
>  - there's a bit of trailing whitespace in the new file, and a few
>    places where indentation seems to have gone a bit wrong;
>  - please make sure the whole thing is linewrapped to <80 characters; and
>  - there's no need for braces around single-line blocks.

] Done

>
> More substantive comments:
>  - I think the dirty_vram.c and dirty_vram.h files belong under mm/
>    rather than under hvm/.  The ``#include "../mm/mm-locks.h"'' is
>    an indicator that this is really MM code.

I have moved dirty_vram.c under mm/
I have moved dirty_vram.h under include/asm-x86 .  In that location it is available to modules like hvm.c

>  - Please use xzalloc() rather than xmalloc() + memset(0).  It avoids
>    the sizes of alloc and memset getting out of sync.

Done

>  - The i386 build is dead, so you can drop some #ifdef __i386__ sections.

Done

>  - There really ought to be some limit on how many PTEs you're willing
>    to track.  Otherwise a large guest can consume lots and lots of Xen's
>    memory by making lots of PTEs that point to framebuffers.  That
>    might also lead to performance problems, e.g. in the unshadow
>    function that walks over all those liked lists.

Done but see note below in the comment starting "This needs to signal some sort of error".

>    Also, I think that the memory for the paddr_links ought to come from
>    the shadow pool (i.e. using domain->arch.paging.alloc_page())
>    rather than soaking up otherwise free memory.

Done

>
> A few other detailed comments below...
>
> > +/* Free a paddr_link struct, given address of its predecessor in linked list
> */
> > +dv_paddr_link_t *
> > +free_paddr_link(struct domain *d,
> > +                dv_paddr_link_t **ppl,
> > +                dv_paddr_link_t *pl)
> > +{
> > +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> > +    dv_paddr_link_t *npl; /* next pl */
> > +
> > +    ASSERT( paging_locked_by_me(d) );
> > +    /* extension mapping? */
> > +    if (ppl) /* yes. free it */
> > +    {
> > +        pl = (*ppl);
>
> This assignment seems like it should always be a noop.  Would it be
> correct to replace it with ASSERT(pl == *ppl)?

Yes,  Done.

>
> > +        (*ppl) = npl = pl->pl_next;
> > +    }
> > +    else  /* main table */
> > +    {
> > +        /* move 2nd mapping to main table.
> > +         * and free 2nd mapping */
> > +        dv_paddr_link_t * spl;
> > +        spl = pl->pl_next;
> > +        if (spl == NULL)
> > +        {
> > +            pl->sl1ma = INVALID_PADDR;
> > +            return pl;
> > +        }
> > +        pl->sl1ma = spl->sl1ma;
> > +        pl->pl_next = spl->pl_next;
> > +        npl = pl; /* reprocess main table entry again */
> > +        pl = spl;
>
> OK, that took a lot of staring at to be sure it's right. :)

I added some comments.  Reading code shouldn't be so taxing.

> I'd be
> inclined to just put all paddr_links in the linked list (and have an
> array of pointers rather than an array of paddr_link_ts).  Is it worth
> having the extra complexity here, and at the callers, to avoid a single
> memory read?

Almost all frame buffer pages have a single mapping so the algorithm's common case is met by constructing pl_tab as an array of paddr_links.
That is, the table's paddr_links will rarely point to a chain of extension paddr_links.
The code encapsulates the complexity in a single function.  Sorry it required so much staring.
Even if all paddr_links were stored in a linked list, the caller would have to walk that linked list, so would be just as complex.

>
> > +    }
> > +    pl->sl1ma = INVALID_PADDR;
> > +    pl->pl_next = dirty_vram->pl_free;
> > +    dirty_vram->pl_free = pl;
> > +    return npl;
> > +}
> > +
> > +
> > +/* dirty_vram_range_update()
> > + * This is called whenever a level 1 page table entry is modified.
> > + * If the L1PTE is being cleared, the function removes any paddr_links
> > + * that refer to it.
> > + * If the L1PTE is being set to a frame buffer page, a paddr_link is
> > + * created for that page's entry in pl_tab.
> > + * Returns 1 iff entry found and set or cleared.
> > + */
> > +int dirty_vram_range_update(struct domain *d,
> > +                            unsigned long gfn,
> > +                            paddr_t sl1ma,
> > +                            int set)
> > +{
> > +    int effective = 0;
> > +    dv_range_t *range;
> > +
> > +    ASSERT(paging_locked_by_me(d));
> > +    range = dirty_vram_range_find_gfn(d, gfn);
> > +    if ( range )
> > +    {
>
> I think this would be more readable as 'if ( !range ) return 0' here
> rather than indenting most of the function.

Done.

>
> > +        unsigned long i = gfn - range->begin_pfn;
> > +        dv_paddr_link_t *pl = &range->pl_tab[ i ];
> > +        dv_paddr_link_t **ppl = NULL;
> > +        int len = 0;
> > +
> > +        /* find matching entry (pl), if any, and its predecessor
> > +         * in linked list (ppl) */
> > +        while (pl != NULL)
> > +        {
> > +            if (pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
> > +                break;
> > +            ppl = &pl->pl_next;
> > +            pl = *ppl;
> > +            len++;
> > +        }
> > +
> > +        if (set)
> > +        {
> > +            /* Did we find sl1ma in either the main table or the linked list? */
> > +            if (pl == NULL) /* no, so we'll need to alloc a link */
> > +            {
> > +                ASSERT(ppl != NULL);
> > +                /* alloc link and append it to list */
> > +                (*ppl) = pl = alloc_paddr_link(d);
> > +                if (pl == NULL)
> > +                    goto out;
>
> This needs to signal some sort of error.  Otherwise, if we can't add
> this sl1e to the list we'll just silently fail to track it.

I don't know what sort of error we can signal.
The immediate symptom would be that areas of the monitor would not be refreshed.
But, since we're running out of memory, that might be the least of the quests's woes.

The updated patch actually makes the symptom more likely (though still very unlikely) by
putting an arbitrary bound on the length of paddr_link chains.
It handles a rogue process, one that has an arbitrarily large number of mappings for
frame buffer pages, by simply not recording the excessive mappings.
If that results in unrefreshed blocks on the monitor, so be it.

>
> > +            }
> > +            if ( pl->sl1ma != sl1ma )
> > +            {
>
> ASSERT(pl->sl1ma == INVALID_PADDR) ?

Yes, Done.

>
> > +                pl->sl1ma = sl1ma;
> > +                range->nr_mappings++;
> > +            }
> > +            effective = 1;
> > +            if (len > range->mappings_hwm)
> > +            {
> > +                range->mappings_hwm = len;
> > +#if DEBUG_update_vram_mapping
> > +                gdprintk(XENLOG_DEBUG,
> > +                         "[%lx] set      sl1ma:%lx hwm:%d mappings:%d
> freepages:%d\n",
> > +                         gfn, sl1ma,
> > +                         range->mappings_hwm,
> > +                         range->nr_mappings,
> > +                         d->arch.paging.shadow.free_pages);
> > +#endif
> > +            }
> > +        }
> > +        else /* clear */
> > +        {
> > +            if (pl && pl->sl1ma == sl1ma )
> > +            {
> > +#if DEBUG_update_vram_mapping
> > +                gdprintk(XENLOG_DEBUG,
> > +                         "[%lx] clear    sl1ma:%lx mappings:%d\n",
> > +                         gfn, sl1ma,
> > +                         range->nr_mappings-1);
> > +#endif
> > +                free_paddr_link(d, ppl, pl);
> > +                if ( --range->nr_mappings == 0 )
> > +                {
> > +                    dirty_vram_range_free(d, range);
>
> What's this for?  If the guest unmaps the framebuffer and remaps it (or
> if the shadow PTs of the mappings are temporarily discarded) this will
> stop us from tracking the new mappings until the toolstack asks for the
> bitmap (and then it will be expensive to go and find the mappings).
>

I don't see this happening.  If the guest unmaps the framebuffer, the shadow
code lazily recovers the shadow pages, so tracking will continue until it decides a page
is no longer a shadow page.  That is when this code is invoked.

It tears down the mappings to that page and if some range ends up with no mappings then the range is useless.
Vram ranges are generated willy-nilly as needed.   This is the only mechanism for cleaning them up.

> > +                }
> > +                effective = 1;
> > +            }
> > +        }
> > +    }
> > + out:
> > +    return effective;
> > +}
>
> > +/* shadow_track_dirty_vram()
> > + * This is the API called by the guest to determine which pages in the
> range
> > + * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
> > + * It creates the domain's dv_dirty_vram on demand.
> > + * It creates ranges on demand when some [begin_pfn:nr) is first
> encountered.
> > + * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
> > + * It copies the dirty bitmask into guest storage.
> > + */
> > +int shadow_track_dirty_vram(struct domain *d,
> > +                            unsigned long begin_pfn,
> > +                            unsigned long nr,
> > +                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> > +{
> > +    int rc = 0;
> > +    unsigned long end_pfn = begin_pfn + nr;
> > +    int flush_tlb = 0;
> > +    dv_range_t *range;
> > +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> > +
> > +    if (end_pfn < begin_pfn
> > +            || begin_pfn > p2m->max_mapped_pfn
> > +            || end_pfn >= p2m->max_mapped_pfn)
>
> I know you just copied this from the old definition but the limits seem
> wrong here -- I think it should be:
>
>     if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )

You're right.  Done.  And commented because it's pretty obscure.

>
>
> > +/* hap_clean_vram_tracking_range()
> > + * For all the pages in the range specified by [begin_pfn,nr),
> > + * note in the dirty bitmap any page that has been marked as read-write,
> > + * which signifies that the page has been dirtied, and reset the page
> > + * to ram_logdirty.
> > + */
> > +void hap_clean_vram_tracking_range(struct domain *d,
> > +                                   unsigned long begin_pfn,
> > +                                   unsigned long nr,
> > +                                   uint8_t *dirty_bitmap)
> > +{
> > +    int i;
> > +    unsigned long pfn;
> > +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> > +    dv_range_t *range;
> > +
> > +    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
> > +    ASSERT(paging_locked_by_me(d));
> > +
> > +    if ( !dirty_vram )
> > +    {
> > +        gdprintk(XENLOG_DEBUG, "Should only be called while tracking dirty
> vram.\n");
> > +        return;
> > +    }
> > +
> > +    range = dirty_vram_range_find(d, begin_pfn, nr);
> > +    if (!range)
> > +        return;
>
> Oughtn't we to return all 1s in the bitmap here?  If the range isn't
> currently being tracked we should conservatively assume it's all dirty,
> right?

The callers will should ensure that the range exists.  This is just a conservative test.

Anyway, it begs the question of whether a new range should be considered all
dirty or all clean.  Intuitively "all dirty" seems the right answer but in practice
it works fine as written since the guest is busy updating the
whole frame buffer.

>
> > +
> > +    /* set l1e entries of P2M table to be read-only. */
> > +    /* On first write, it page faults, its entry is changed to read-write,
> > +     * its bit in the dirty bitmap is set, and on retry the write succeeds. */
> > +    for (i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++)
> > +    {
> > +        p2m_type_t pt;
> > +        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
> > +        if (pt == p2m_ram_rw)
> > +            dirty_bitmap[i >> 3] |= (1 << (i & 7));
> > +    }
> > +    flush_tlb_mask(d->domain_dirty_cpumask);
> > +}
> > +
> > +static void hap_vram_tracking_init(struct domain *d)
> > +{
> > +    paging_log_dirty_init(d, hap_enable_vram_tracking,
> > +                          hap_disable_vram_tracking,
> > +                          NULL);
> > +}
> > +
> > +/* hap_track_dirty_vram()
> > + * Create the domain's dv_dirty_vram struct on demand.
> > + * Create a dirty vram range on demand when some
> [begin_pfn:begin_pfn+nr] is first encountered.
> > + * Collect the guest_dirty bitmask, a bit mask of the dirties vram pages, by
> > + * calling paging_log_dirty_range().
> > + */
> > +int hap_track_dirty_vram(struct domain *d,
> > +                         unsigned long begin_pfn,
> > +                         unsigned long nr,
> > +                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> > +{
> > +    long rc = 0;
> > +    dv_dirty_vram_t *dirty_vram;
> > +    int restart_log_dirty = 0;
> > +
> > +    paging_lock(d);
> > +    dirty_vram = d->arch.hvm_domain.dirty_vram;
> > +    if ( nr )
> > +    {
> > +        dv_range_t *range = NULL;
> > +        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
> > +        unsigned long dirty_bitmap[size];
>
> All the users of this array cast to (uint8_t *) -- just declare it as
> uint8_t * instead?

Yes, done.  The original code seemed overly fond of using LONGs ...

>
> > +
> > +        /* Already tracking dirty vram? */
> > +        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
> > +        {
> > +            /* Handle the addition of another range */
> > +            range = dirty_vram_range_find(d, begin_pfn, nr);
> > +            if ( !range )
> > +            {
> > +                rc = -ENOMEM;
> > +                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
> > +                    goto param_fail;
> > +                restart_log_dirty = 1;
> > +            }
> > +        }
> > +        /* Just starting to track dirty vram? */
> > +        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
> > +        {
> > +            rc = -ENOMEM;
> > +            if ( !(dirty_vram = dirty_vram_alloc(d)) )
> > +                goto param_fail;
> > +
> > +            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn, nr)) )
> > +                goto param_fail;
> > +
> > +            restart_log_dirty = 1;
> > +            /* Initialize callbacks for vram tracking */
> > +            hap_vram_tracking_init(d);
> > +        }
> > +        else
> > +        {
> > +            /* Test for invalid combination */
> > +            if ( !paging_mode_log_dirty(d) && dirty_vram )
> > +                rc = -EINVAL;
> > +            else /* logging dirty of all memory, not tracking dirty vram */
> > +                rc = -ENODATA;
> > +            goto param_fail;
> > +        }
> > +
> > +        if (restart_log_dirty)
> > +        {
> > +            /* disable then enable log dirty */
>
> Why disable and re-enable?  The call to paging_log_dirty_range() below
> will reset the p2m entries of the range you care about, so I think all
> you need to do is enable it in the 'just starting' case above.

Done.

>
> And, since you know you're in HAP mode, not in log-dirty mode, and
> already have the paging lock, you can just set
> d->arch.paging.mode |= PG_log_dirty there rather than jumping through
> the paging_log_dirty_enable() path and messing with locks.

No, paging_log_dirty_enable() goes through several layers of functions and
ends up calling hap_enable_vram_tracking(), which does quite a bit of stuff.

>
> > +            paging_unlock(d);
> > +            if (paging_mode_log_dirty(d))
> > +                paging_log_dirty_disable(d);
> > +
> > +            rc = paging_log_dirty_enable(d);
> > +            paging_lock(d);
> > +            if (rc != 0)
> > +                goto param_fail;
> > +        }
> > +
> > +        paging_unlock(d);
> > +        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
> > +   paging_log_dirty_range(d, begin_pfn, nr, (uint8_t*)dirty_bitmap);
> > +        rc = -EFAULT;
> > +        if ( copy_to_guest(guest_dirty_bitmap,
> > +                           (uint8_t*)dirty_bitmap,
> > +                           size * BYTES_PER_LONG) == 0 )
> > +        {
> > +            rc = 0;
> > +        }
> > +    }
> > +    else
> > +    {
> > +        /* If zero pages specified while already tracking dirty vram
> > +         * then stop tracking */
> > +        if ( paging_mode_log_dirty(d) && dirty_vram ) {
> > +            paging_unlock(d);
> > +            rc = paging_log_dirty_disable(d);
> > +            paging_lock(d);
> > +            dirty_vram_free(d);
>
> This is different from the shadow case -- there, IIUC, you just ignore
> requests where nr == 0; here, you tear down all vram tracking.
> Can you choose one of those, and document it in the public header?

Done.  I changed the shadow code to tear down if nr == 0.
(The HAP code seemed to expect that behavior and the shadow code didn't
seem to care.)
And updated the public header.

>
> > +        } else /* benign no-op */
> > +        {
> > +            rc = 0;
> > +        }
> > +        paging_unlock(d);
> > +    }
> > +
> > +    return rc;
>
>
> > +/* paging_mark_dirty_hap()
> > + * Make a hap page writeable and mark it as dirty.
> > + * This done atomically under the p2m and paging locks to avoid leaving
> > + * a window where the page might be modified without being marked as
> dirty.
> > + */
>
> I'm perplexed by this -- AFAICT it's either not necessary (because all
> log-dirty read/clean ops are done with the domain paused) or not sufficient
> (because although the bitmap and the PTE are updated under the p2m lock,
> the actual dirtying of the page happens at some other time).  Can you
> spell out for me exactly what this is protecting against?

The comment over-stated the problem so I've toned it down
from "without being marked" to "without being counted".

paging_mark_dirty_hap() is in the page fault path and has two steps:

(1) It calls p2m_change_type() to re-mark some pfn as writeable (i.e. p2m_ram_rw),
which is a no-op if the pfn is already writeable.
This must be done under the p2m_lock.

(2) If the pfn was previously read-only (i.e. p2m_ram_logdirty) then it bumps two
dirty counts.  And it marks the page as dirty.
This must be done under the paging lock.

As an invariant, the dirty counts should be precisely the number of pages
made writeable.

With this patch, step (2) is also done under the p2m_lock.  This avoids having
a window between steps (1) and (2), in which hap_track_dirty_vram() might
get in and would break the invariant by clearing the range's dirty count.

It may be that this invariant is not particularly useful, that things will just work out.
But I do wonder if, without it, we'll have situations where the dirty counts will indicate
dirty pages when there are none, or no dirty pages when there are some.

>
> > +typedef int (*hash_pfn_callback_t)(struct vcpu *v,
> > +                                   mfn_t smfn,
> > +                                   unsigned long begin_pfn,
> > +                                   unsigned long end_pfn,
> > +                                   int *removed);
> > +
> > +static int hash_pfn_foreach(struct vcpu *v,
> > +                            unsigned int callback_mask,
> > +                            hash_pfn_callback_t callbacks[],
> > +                            unsigned long begin_pfn,
> > +                            unsigned long end_pfn)
> > +/* Walk the hash table looking at the types of the entries and
> > + * calling the appropriate callback function for each entry.
> > + * The mask determines which shadow types we call back for, and the
> array
> > + * of callbacks tells us which function to call.
> > + * Any callback may return non-zero to let us skip the rest of the scan.
> > + *
> > + * WARNING: Callbacks MUST NOT add or remove hash entries unless
> they
> > + * then return non-zero to terminate the scan. */
>
> This code duplication is a bit much.  I think you should recast the
> existing hash_foreach() function to take a pointer as its fourth
> argument instead of an MFN, and then make the existing callers just cast
> their MFN argument as a pointer.   The you can use the same function,
> passing a pointer to a struct { begin, end, removed }.
>
> Please make the changes to hash_foreach() in a separate patch from the
> dirty_vram stuff.

Yes, I'd be happy to submit a patch as you suggest after we're done with this one.

>
> > @@ -1211,12 +1164,14 @@ static int shadow_set_l1e(struct vcpu *v,
> >                  shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
> >                  /* fall through */
> >              case 0:
> > -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
> > +                shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
> >                  break;
> >              }
> >          }
> >      }
> >
> > +    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
>
> Why is this being called twice here?

Stupidity.  I've eliminated the first of the two.

>
> Cheers,
>
> Tim.

Thanks again.
-- rsp

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-11-07 20:36   ` Robert Phillips
@ 2012-11-08 13:25     ` Tim Deegan
  2012-11-12 21:31       ` Robert Phillips
  0 siblings, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2012-11-08 13:25 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

Hi, 

Thanks for the update patch.

At 15:36 -0500 on 07 Nov (1352302577), Robert Phillips wrote:
> > More substantive comments:
> >  - I think the dirty_vram.c and dirty_vram.h files belong under mm/
> >    rather than under hvm/.  The ``#include "../mm/mm-locks.h"'' is
> >    an indicator that this is really MM code.
> 
> I have moved dirty_vram.c under mm/
> I have moved dirty_vram.h under include/asm-x86 .  In that location it
> is available to modules like hvm.c

Sure, that seems sgood. 

> > I'd be
> > inclined to just put all paddr_links in the linked list (and have an
> > array of pointers rather than an array of paddr_link_ts).  Is it worth
> > having the extra complexity here, and at the callers, to avoid a single
> > memory read?
> 
> Almost all frame buffer pages have a single mapping so the algorithm's
> common case is met by constructing pl_tab as an array of paddr_links.
> That is, the table's paddr_links will rarely point to a chain of
> extension paddr_links.  The code encapsulates the complexity in a
> single function.  Sorry it required so much staring.  Even if all
> paddr_links were stored in a linked list, the caller would have to
> walk that linked list, so would be just as complex.

OK.

> > > +        if (set)
> > > +        {
> > > +            /* Did we find sl1ma in either the main table or the linked list? */
> > > +            if (pl == NULL) /* no, so we'll need to alloc a link */
> > > +            {
> > > +                ASSERT(ppl != NULL);
> > > +                /* alloc link and append it to list */
> > > +                (*ppl) = pl = alloc_paddr_link(d);
> > > +                if (pl == NULL)
> > > +                    goto out;
> >
> > This needs to signal some sort of error.  Otherwise, if we can't add
> > this sl1e to the list we'll just silently fail to track it.
> 
> I don't know what sort of error we can signal.
> 
> The immediate symptom would be that areas of the monitor would not be
> refreshed.  But, since we're running out of memory, that might be the
> least of the quests's woes.  The updated patch actually makes the
> symptom more likely (though still very unlikely) by putting an
> arbitrary bound on the length of paddr_link chains.  It handles a
> rogue process, one that has an arbitrarily large number of mappings
> for frame buffer pages, by simply not recording the excessive
> mappings.  If that results in unrefreshed blocks on the monitor, so be
> it.

I think the correct behaviour would be to report these pages as dirty.
That way anything that relies on seeing all changes will behave
correctly, though less efficiently.

> > > +#endif
> > > +                free_paddr_link(d, ppl, pl);
> > > +                if ( --range->nr_mappings == 0 )
> > > +                {
> > > +                    dirty_vram_range_free(d, range);
> >
> > What's this for?  If the guest unmaps the framebuffer and remaps it (or
> > if the shadow PTs of the mappings are temporarily discarded) this will
> > stop us from tracking the new mappings until the toolstack asks for the
> > bitmap (and then it will be expensive to go and find the mappings).
> >
> 
> I don't see this happening.  If the guest unmaps the framebuffer, the
> shadow code lazily recovers the shadow pages, so tracking will
> continue until it decides a page is no longer a shadow page.  That is
> when this code is invoked.

If the guest unmaps the buffer by clearing the PTEs then when the PTE
changes are propagated into the shadows this will cause nr_mappings to
fall to zero here.  If we _don't_ free the range, then when the FB is
mapped again we'll spot the new PTEs as they're shadowed and DTRT.  If we
_do_ free the range we end up walking all shadows looking for the
mappings.

> It tears down the mappings to that page and if some range ends up
> with no mappings then the range is useless.  Vram ranges are generated
> willy-nilly as needed.  This is the only mechanism for cleaning them up.

Shouldn't there be some way for the tools to indicate that they're done
with a range?  I guess they can tear down the whole lot and then start
again with whatever ranges are still in use. 

> > Oughtn't we to return all 1s in the bitmap here?  If the range isn't
> > currently being tracked we should conservatively assume it's all dirty,
> > right?
> 
> The callers will should ensure that the range exists.  This is just a
> conservative test.

OK.

> Anyway, it begs the question of whether a new range should be
> considered all dirty or all clean.  Intuitively "all dirty" seems the
> right answer but in practice it works fine as written since the guest
> is busy updating the whole frame buffer.

What if the guest's not writing to the framebuffer at the moment, or
only to a part of it?  Shouldn't the tools see the existing content?

> > And, since you know you're in HAP mode, not in log-dirty mode, and
> > already have the paging lock, you can just set
> > d->arch.paging.mode |= PG_log_dirty there rather than jumping through
> > the paging_log_dirty_enable() path and messing with locks.
> 
> No, paging_log_dirty_enable() goes through several layers of functions and
> ends up calling hap_enable_vram_tracking(), which does quite a bit of stuff.

OK.

> > > +/* paging_mark_dirty_hap()
> > > + * Make a hap page writeable and mark it as dirty.
> > > + * This done atomically under the p2m and paging locks to avoid leaving
> > > + * a window where the page might be modified without being marked as
> > dirty.
> > > + */
> >
> > I'm perplexed by this -- AFAICT it's either not necessary (because all
> > log-dirty read/clean ops are done with the domain paused) or not sufficient
> > (because although the bitmap and the PTE are updated under the p2m lock,
> > the actual dirtying of the page happens at some other time).  Can you
> > spell out for me exactly what this is protecting against?
> 
> The comment over-stated the problem so I've toned it down
> from "without being marked" to "without being counted".
> 
> paging_mark_dirty_hap() is in the page fault path and has two steps:
> 
> (1) It calls p2m_change_type() to re-mark some pfn as writeable
>     (i.e. p2m_ram_rw), which is a no-op if the pfn is already writeable.
>     This must be done under the p2m_lock.
> 
> (2) If the pfn was previously read-only (i.e. p2m_ram_logdirty) then
>     it bumps two dirty counts.  And it marks the page as dirty.  This must
>     be done under the paging lock.
> 
> As an invariant, the dirty counts should be precisely the number of pages
> made writeable.

That invariant is already doomed -- any other agent (including the guest
itself) can change the p2m mappings for one of the pfns, or mark the pfn
dirty, without going through the NPF fault handler.  AFAICS the counts
could just as well be implemented as flags, to say 'something in this
range was dirtied'; the only interesting case is when they're 0.

> It may be that this invariant is not particularly useful, that things
> will just work out.  But I do wonder if, without it, we'll have
> situations where the dirty counts will indicate dirty pages when there
> are none, or no dirty pages when there are some.

The second of those cases would be the bad one; I think that can be
avoided by just switching the order of paging_mark_dirty() and
p2m_change_type() in hvm_hap_nested_page_fault(), and maybe putting a
wmb() between them. 

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen.
  2012-11-08 13:25     ` Tim Deegan
@ 2012-11-12 21:31       ` Robert Phillips
  0 siblings, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2012-11-12 21:31 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: xen-devel

Hi,

I'm sending version 3 of the patch shortly.  I think it addresses all your concerns.

> -----Original Message-----
> From: Tim Deegan [mailto:tim@xen.org]
> Sent: Thursday, November 08, 2012 8:25 AM
> To: Robert Phillips
> Cc: xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers
> in Xen.
> 
> Hi,
> 
> Thanks for the update patch.
> 
> At 15:36 -0500 on 07 Nov (1352302577), Robert Phillips wrote:
> > > More substantive comments:
> > >  - I think the dirty_vram.c and dirty_vram.h files belong under mm/
> > >    rather than under hvm/.  The ``#include "../mm/mm-locks.h"'' is
> > >    an indicator that this is really MM code.
> >
> > I have moved dirty_vram.c under mm/
> > I have moved dirty_vram.h under include/asm-x86 .  In that location it
> > is available to modules like hvm.c
> 
> Sure, that seems sgood.
> 
> > > I'd be
> > > inclined to just put all paddr_links in the linked list (and have an
> > > array of pointers rather than an array of paddr_link_ts).  Is it worth
> > > having the extra complexity here, and at the callers, to avoid a single
> > > memory read?
> >
> > Almost all frame buffer pages have a single mapping so the algorithm's
> > common case is met by constructing pl_tab as an array of paddr_links.
> > That is, the table's paddr_links will rarely point to a chain of
> > extension paddr_links.  The code encapsulates the complexity in a
> > single function.  Sorry it required so much staring.  Even if all
> > paddr_links were stored in a linked list, the caller would have to
> > walk that linked list, so would be just as complex.
> 
> OK.
> 
> > > > +        if (set)
> > > > +        {
> > > > +            /* Did we find sl1ma in either the main table or the linked list? */
> > > > +            if (pl == NULL) /* no, so we'll need to alloc a link */
> > > > +            {
> > > > +                ASSERT(ppl != NULL);
> > > > +                /* alloc link and append it to list */
> > > > +                (*ppl) = pl = alloc_paddr_link(d);
> > > > +                if (pl == NULL)
> > > > +                    goto out;
> > >
> > > This needs to signal some sort of error.  Otherwise, if we can't add
> > > this sl1e to the list we'll just silently fail to track it.
> >
> > I don't know what sort of error we can signal.
> >
> > The immediate symptom would be that areas of the monitor would not be
> > refreshed.  But, since we're running out of memory, that might be the
> > least of the quests's woes.  The updated patch actually makes the
> > symptom more likely (though still very unlikely) by putting an
> > arbitrary bound on the length of paddr_link chains.  It handles a
> > rogue process, one that has an arbitrarily large number of mappings
> > for frame buffer pages, by simply not recording the excessive
> > mappings.  If that results in unrefreshed blocks on the monitor, so be
> > it.
> 
> I think the correct behaviour would be to report these pages as dirty.
> That way anything that relies on seeing all changes will behave
> correctly, though less efficiently.

Ok, I've associated a boolean called "stuck_dirty" with each frame buffer page.
If for any reason we're unable to generated a complete set of mappings,
the bit gets set and that frame buffer page is considered dirty forever more,
or until the range gets torn down.

I was unable to make a failure happen (not surprisingly) so I added some
fault injection code for testing.  It's currently disabled/compiled-out.

> 
> > > > +#endif
> > > > +                free_paddr_link(d, ppl, pl);
> > > > +                if ( --range->nr_mappings == 0 )
> > > > +                {
> > > > +                    dirty_vram_range_free(d, range);
> > >
> > > What's this for?  If the guest unmaps the framebuffer and remaps it (or
> > > if the shadow PTs of the mappings are temporarily discarded) this will
> > > stop us from tracking the new mappings until the toolstack asks for the
> > > bitmap (and then it will be expensive to go and find the mappings).
> > >
> >
> > I don't see this happening.  If the guest unmaps the framebuffer, the
> > shadow code lazily recovers the shadow pages, so tracking will
> > continue until it decides a page is no longer a shadow page.  That is
> > when this code is invoked.
> 
> If the guest unmaps the buffer by clearing the PTEs then when the PTE
> changes are propagated into the shadows this will cause nr_mappings to
> fall to zero here.  If we _don't_ free the range, then when the FB is
> mapped again we'll spot the new PTEs as they're shadowed and DTRT.  If we
> _do_ free the range we end up walking all shadows looking for the
> mappings.
> 

Ok, I've removed the teardown code.  Ranges are no longer torn down
just because they have no mappings.  But they can still be torn down
if another mapping is created, one that overlaps, or if dirty vram 
bookkeeping is stopped altogether, which happens as a side-effect
of requesting a range with zero pages.

> > It tears down the mappings to that page and if some range ends up
> > with no mappings then the range is useless.  Vram ranges are generated
> > willy-nilly as needed.  This is the only mechanism for cleaning them up.
> 
> Shouldn't there be some way for the tools to indicate that they're done
> with a range?  I guess they can tear down the whole lot and then start
> again with whatever ranges are still in use.

Right.  Like little children, they do not clean up after themselves.

> 
> > > Oughtn't we to return all 1s in the bitmap here?  If the range isn't
> > > currently being tracked we should conservatively assume it's all dirty,
> > > right?
> >
> > The callers will should ensure that the range exists.  This is just a
> > conservative test.
> 
> OK.
> 
> > Anyway, it begs the question of whether a new range should be
> > considered all dirty or all clean.  Intuitively "all dirty" seems the
> > right answer but in practice it works fine as written since the guest
> > is busy updating the whole frame buffer.
> 
> What if the guest's not writing to the framebuffer at the moment, or
> only to a part of it?  Shouldn't the tools see the existing content?

Ok, in HAP a new range is considered to be "all dirty".

> 
> > > And, since you know you're in HAP mode, not in log-dirty mode, and
> > > already have the paging lock, you can just set
> > > d->arch.paging.mode |= PG_log_dirty there rather than jumping through
> > > the paging_log_dirty_enable() path and messing with locks.
> >
> > No, paging_log_dirty_enable() goes through several layers of functions and
> > ends up calling hap_enable_vram_tracking(), which does quite a bit of
> stuff.
> 
> OK.
> 
> > > > +/* paging_mark_dirty_hap()
> > > > + * Make a hap page writeable and mark it as dirty.
> > > > + * This done atomically under the p2m and paging locks to avoid
> leaving
> > > > + * a window where the page might be modified without being marked
> as
> > > dirty.
> > > > + */
> > >
> > > I'm perplexed by this -- AFAICT it's either not necessary (because all
> > > log-dirty read/clean ops are done with the domain paused) or not
> sufficient
> > > (because although the bitmap and the PTE are updated under the p2m
> lock,
> > > the actual dirtying of the page happens at some other time).  Can you
> > > spell out for me exactly what this is protecting against?
> >
> > The comment over-stated the problem so I've toned it down
> > from "without being marked" to "without being counted".
> >
> > paging_mark_dirty_hap() is in the page fault path and has two steps:
> >
> > (1) It calls p2m_change_type() to re-mark some pfn as writeable
> >     (i.e. p2m_ram_rw), which is a no-op if the pfn is already writeable.
> >     This must be done under the p2m_lock.
> >
> > (2) If the pfn was previously read-only (i.e. p2m_ram_logdirty) then
> >     it bumps two dirty counts.  And it marks the page as dirty.  This must
> >     be done under the paging lock.
> >
> > As an invariant, the dirty counts should be precisely the number of pages
> > made writeable.
> 
> That invariant is already doomed -- any other agent (including the guest
> itself) can change the p2m mappings for one of the pfns, or mark the pfn
> dirty, without going through the NPF fault handler.  AFAICS the counts
> could just as well be implemented as flags, to say 'something in this
> range was dirtied'; the only interesting case is when they're 0.
> 
> > It may be that this invariant is not particularly useful, that things
> > will just work out.  But I do wonder if, without it, we'll have
> > situations where the dirty counts will indicate dirty pages when there
> > are none, or no dirty pages when there are some.
> 
> The second of those cases would be the bad one; I think that can be
> avoided by just switching the order of paging_mark_dirty() and
> p2m_change_type() in hvm_hap_nested_page_fault(), and maybe putting a
> wmb() between them.

Ok, I've done as you suggested.  I don't think the wmb() is needed
but I could be convinced.

> 
> Cheers,
> 
> Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-10-10 14:58   ` Ben Guthro
@ 2013-10-10 15:07     ` Wei Liu
  0 siblings, 0 replies; 35+ messages in thread
From: Wei Liu @ 2013-10-10 15:07 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Robert Phillips, Wei Liu, xen-devel

On Thu, Oct 10, 2013 at 10:58:12AM -0400, Ben Guthro wrote:
> On Thu, Oct 10, 2013 at 10:48 AM, Wei Liu <wei.liu2@citrix.com> wrote:
> 
> > On Fri, Mar 01, 2013 at 08:48:57PM +0000, Robert Phillips wrote:
> > > Support is provided for both shadow and hardware assisted paging (HAP)
> > > modes. This code bookkeeps the set of video frame buffers (vram),
> > > detects when the guest has modified any of those buffers and, upon
> > request,
> > > returns a bitmap of the modified pages.
> > > This lets other software components re-paint the portions of the monitor
> > > (or monitors) that have changed.
> > > Each monitor has a frame buffer of some size at some position
> > > in guest physical memory.
> > > The set of frame buffers being tracked can change over time as monitors
> > > are plugged and unplugged.
> > >
> > > This is the 9th version of this patch.
> >
> > Hi Robert, any news for v10 of this patch?
> >
> 
> Robert has been asked to stop work on this, after so many submissions.
> He has since been focusing on other projects - I don't think there has been
> any additional effort put forth to get this upstream.
> 

OK, thanks for the update.

> We (XenClient) maintain a patch queue, and will continue to carry this
> patch, for now.
> 

So any new change to this patch? Or you just carry it as-is? I suppose
there should be some changes to fix Tim's test cases.

Wei.
> Ben

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-10-10 14:48 ` Wei Liu
@ 2013-10-10 14:58   ` Ben Guthro
  2013-10-10 15:07     ` Wei Liu
  0 siblings, 1 reply; 35+ messages in thread
From: Ben Guthro @ 2013-10-10 14:58 UTC (permalink / raw)
  To: Wei Liu; +Cc: Robert Phillips, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1118 bytes --]

On Thu, Oct 10, 2013 at 10:48 AM, Wei Liu <wei.liu2@citrix.com> wrote:

> On Fri, Mar 01, 2013 at 08:48:57PM +0000, Robert Phillips wrote:
> > Support is provided for both shadow and hardware assisted paging (HAP)
> > modes. This code bookkeeps the set of video frame buffers (vram),
> > detects when the guest has modified any of those buffers and, upon
> request,
> > returns a bitmap of the modified pages.
> > This lets other software components re-paint the portions of the monitor
> > (or monitors) that have changed.
> > Each monitor has a frame buffer of some size at some position
> > in guest physical memory.
> > The set of frame buffers being tracked can change over time as monitors
> > are plugged and unplugged.
> >
> > This is the 9th version of this patch.
>
> Hi Robert, any news for v10 of this patch?
>

Robert has been asked to stop work on this, after so many submissions.
He has since been focusing on other projects - I don't think there has been
any additional effort put forth to get this upstream.

We (XenClient) maintain a patch queue, and will continue to carry this
patch, for now.

Ben

[-- Attachment #1.2: Type: text/html, Size: 1610 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-03-01 20:48 Robert Phillips
  2013-03-02 11:20 ` Pasi Kärkkäinen
  2013-03-07 12:05 ` Tim Deegan
@ 2013-10-10 14:48 ` Wei Liu
  2013-10-10 14:58   ` Ben Guthro
  2 siblings, 1 reply; 35+ messages in thread
From: Wei Liu @ 2013-10-10 14:48 UTC (permalink / raw)
  To: Robert Phillips; +Cc: wei.liu2, xen-devel

On Fri, Mar 01, 2013 at 08:48:57PM +0000, Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> 
> This is the 9th version of this patch.

Hi Robert, any news for v10 of this patch?

Wei.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-03-01 20:48 Robert Phillips
  2013-03-02 11:20 ` Pasi Kärkkäinen
@ 2013-03-07 12:05 ` Tim Deegan
  2013-10-10 14:48 ` Wei Liu
  2 siblings, 0 replies; 35+ messages in thread
From: Tim Deegan @ 2013-03-07 12:05 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

Hi,

At 15:48 -0500 on 01 Mar (1362152937), Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> 
> This is the 9th version of this patch.

I'm afraid we'll need to go to a tenth.  During VM shutdown of a
shadow-pagetables winXP VM, on AMD:

(XEN) Xen BUG at domain_page.c:143
(XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Tainted:    C ]----
(XEN) CPU:    2
(XEN) RIP:    e008:[<ffff82c4c015efd2>] map_domain_page+0x423/0x4eb
(XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
(XEN) rax: 0000000000000060   rbx: ffff83021797d000   rcx: 0000000000000000
(XEN) rdx: 0000000000000060   rsi: 0000000000000060   rdi: 0000000000000040
(XEN) rbp: ffff83021d60fa68   rsp: ffff83021d60fa28   r8:  00000000ffffffff
(XEN) r9:  ffff820060006008   r10: 0000000000000000   r11: 000000000016a515
(XEN) r12: ffff8300df51d000   r13: 000000000021f9df   r14: 0000000000000060
(XEN) r15: 0000000000000007   cr0: 0000000080050033   cr4: 00000000000006f0
(XEN) cr3: 00000002166c7000   cr2: ffff880002756830
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen stack trace from rsp=ffff83021d60fa28:
(XEN)    ffff83021797d2d8 0000000000000286 ffff83021d60fa78 ffff83021f9df1f8
(XEN)    0000000000000000 000000000021fbb6 ffff83021f9df0e0 00000000000000d8
(XEN)    ffff83021d60fa88 ffff82c4c01e96c7 0000000000000006 ffff83021f9df1e0
(XEN)    ffff83021d60fae8 ffff82c4c01f90a3 ffff83021f9df0a0 ffff83021f9df0a0
(XEN)    ffff83021f991000 0000000000000009 00000000002166e7 0000000000000004
(XEN)    000000000016a515 ffff83021f991000 ffff82e0043f76c0 ffff8300df688000
(XEN)    ffff83021d60fb38 ffff82c4c02077c8 0000000000000093 000000000021fbb6
(XEN)    ffff82c4c0125159 0000000000000000 0000000000000601 000000000021f65a
(XEN)    ffff82e0043f76c0 ffff82e000000000 ffff83021d60fb68 ffff82c4c01f2acc
(XEN)    0000000000162aab 000000000021f65a 000000000021f65a ffff820040046008
(XEN)    ffff83021d60fbe8 ffff82c4c0207680 ffff83021d608000 ffff820040046000
(XEN)    ffff8300df688000 000000000021f65a 000000021f65a000 000000000021fbb6
(XEN)    0000000200000000 001040001d60fbc8 ffff82c4c0125159 0000000000000000
(XEN)    ffff82e0043ecb40 ffff82e000000000 ffff83021f991000 ffff82e0043ecb20
(XEN)    ffff83021d60fc18 ffff82c4c01f2ad7 000000fc00000000 0000000000000000
(XEN)    0000000000000000 000000000021f65a ffff83021d60fc78 ffff82c4c01f3c0c
(XEN)    0000000100000000 ffff8300df688000 0000000000000286 ffff83021d60fc58
(XEN)    ffff82c4c0125159 ffff83021f991000 0000000000000000 0000000000000000
(XEN)    00007d2000000000 ffff83021d608000 ffff83021d60fcc8 ffff82c4c01f3f8b
(XEN)    ffff83021d60fcc8 ffff82c4c012d34c ffff82c4c0125416 0000000000000003
(XEN) Xen call trace:
(XEN)    [<ffff82c4c015efd2>] map_domain_page+0x423/0x4eb
(XEN)    [<ffff82c4c01e96c7>] remap_vaddr+0x2b/0x35
(XEN)    [<ffff82c4c01f90a3>] dirty_vram_delete_shadow+0xac/0x18d
(XEN)    [<ffff82c4c02077c8>] sh_destroy_l1_shadow__guest_3+0x10a/0x25d
(XEN)    [<ffff82c4c01f2acc>] sh_destroy_shadow+0x113/0x194
(XEN)    [<ffff82c4c0207680>] sh_destroy_l2_shadow__guest_3+0x38a/0x3c8
(XEN)    [<ffff82c4c01f2ad7>] sh_destroy_shadow+0x11e/0x194
(XEN)    [<ffff82c4c01f3c0c>] _shadow_prealloc+0x45a/0x678
(XEN)    [<ffff82c4c01f3f8b>] sh_set_allocation+0x161/0x2a0
(XEN)    [<ffff82c4c01f893d>] shadow_teardown+0x25e/0x3b4
(XEN)    [<ffff82c4c01dce35>] paging_teardown+0x29/0x40
(XEN)    [<ffff82c4c015e768>] domain_relinquish_resources+0xae/0x2e6
(XEN)    [<ffff82c4c01055b4>] domain_kill+0x87/0xe1
(XEN)    [<ffff82c4c0103a5b>] do_domctl+0xaa3/0x11e8
(XEN)    [<ffff82c4c022601b>] syscall_enter+0xeb/0x145
(XEN)    
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 2:
(XEN) Xen BUG at domain_page.c:143
(XEN) ****************************************

( config: 
 name = "winxp"
 builder = "hvm"
 memory = 4096
 vcpus = 2
 hap = 0
 vif = [ 'type=ioemu, bridge=xenbr0' ]
 disk = [ 'phy:/dev/loop0,hda,w', 'file:/root/winxpsp3.iso,hdc:cdrom,r' ]
 usb = 1
 usbdevice = "tablet"
)

And in normal operation of a 64-bit Win7 VM, on Intel:

(XEN) Xen BUG at domain_page.c:143
(XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    5
(XEN) RIP:    e008:[<ffff82c4c015efd2>] map_domain_page+0x423/0x4eb
(XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: ffff83023d508000   rcx: 00000000ffffffff
(XEN) rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff8300bf2fb338
(XEN) rbp: ffff83023d527c28   rsp: ffff83023d527be8   r8:  ffffffffffffffff
(XEN) r9:  ffff820060006008   r10: 0000000000000000   r11: 0180000000000000
(XEN) r12: ffff8300bf2fb000   r13: 00000000001cd200   r14: 0000000000000080
(XEN) r15: 0000000000000000   cr0: 0000000080050033   cr4: 00000000000026f0
(XEN) cr3: 000000023f3d6000   cr2: ffff88002da162b0
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen stack trace from rsp=ffff83023d527be8:
(XEN)    ffff83023d5082d8 0000000000000286 0000000000000069 ffff8301cd2006a8
(XEN)    0000000000000007 00000000000f0047 000000020f0be038 ffff83023f213d90
(XEN)    ffff83023d527c48 ffff82c4c01e96c7 00000000001d6a00 ffff82004006e038
(XEN)    ffff83023d527cc8 ffff82c4c01e9c18 ffff8300bf2fb000 000000000020f0be
(XEN)    000000000000006e 00000000000006a8 ffff830200000001 00000000000f0047
(XEN)    ffff83023fca0000 0000000000000000 00000000001d6a00 ffff82004006e038
(XEN)    0000000000000007 00000000000f0047 000ffffffffff000 ffff83023fca0000
(XEN)    ffff83023d527d38 ffff82c4c0212bf9 ffff82004000c000 ffff82004006e000
(XEN)    000000020f0be000 ffff8300bf2ee000 00000000000f0160 00000000000f0000
(XEN)    ffff83023f213d50 ffff82e0041e17c0 0000000000000001 00007d2000000000
(XEN)    ffff8300bf2ee000 00000000000f0000 ffff83023d527da8 ffff82c4c01f92ca
(XEN)    ffff83023f213d50 ffff83023fca0000 0000000000000390 00000000000f0160
(XEN)    ffff83023f213d90 00000000000f0160 ffff82c4c0125416 0000000000000000
(XEN)    0000000000000160 00000000000f0000 00007f3489409004 00000000000f0000
(XEN)    ffff83023d527e48 ffff82c4c01ea132 0000000100000001 ffff83023d508000
(XEN)    ffff82e0046b1b40 00000000002358da 00007f3486a84004 ffff83023f213d90
(XEN)    ffff83023fca0ad0 ffff82e0046b1b40 0000000000000000 ffff82c4c017589c
(XEN)    ffff83023fca0000 000000000000000f 0000000000237487 ffff83023d508000
(XEN)    ffffffffffffffea 0000000000000006 00007f3489409004 00000000000f0000
(XEN)    ffff83023d527ef8 ffff82c4c01b4315 ffff83023d527e68 ffff82c4c016d072
(XEN) Xen call trace:
(XEN)    [<ffff82c4c015efd2>] map_domain_page+0x423/0x4eb
(XEN)    [<ffff82c4c01e96c7>] remap_vaddr+0x2b/0x35
(XEN)    [<ffff82c4c01e9c18>] dirty_vram_range_update+0x96/0x3bc
(XEN)    [<ffff82c4c0212bf9>] sh_find_vram_mappings_in_l1__guest_4+0x107/0x135
(XEN)    [<ffff82c4c01f92ca>] sh_find_all_vram_mappings+0x146/0x19e
(XEN)    [<ffff82c4c01ea132>] shadow_track_dirty_vram+0x1f4/0x46e
(XEN)    [<ffff82c4c01b4315>] do_hvm_op+0xf39/0x1f43
(XEN)    [<ffff82c4c022601b>] syscall_enter+0xeb/0x145
(XEN)    
(XEN) 
(XEN) ****************************************
(XEN) Panic on CPU 5:
(XEN) Xen BUG at domain_page.c:143
(XEN) ****************************************

( config: 
 name = "win7"
 builder = "hvm"
 memory = 6000
 vcpus = 2
 vif = [ 'type=ioemu, bridge=xenbr0' ]
 disk = [ 'file:/root/win7.img,hda,w', 'file:/root/win7sp1-x64.iso,hdc:cdrom,r' ]
 usb = 1
 usbdevice = "tablet"
 hap = 0
) 

Those were the first two VMs I tried, so I don't know if anything else
works.

Since we're going to v10 anway, can you please make the new
remap_[vm]addr functions static?  They're not used outside
dirty_vram.c.

Also, please drop the mm/p2m.c changes, which are just whitespace
adjustments now, and not related to the rest of the patch. 

Cheers,

Tim. 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-03-01 20:48 Robert Phillips
@ 2013-03-02 11:20 ` Pasi Kärkkäinen
  2013-03-07 12:05 ` Tim Deegan
  2013-10-10 14:48 ` Wei Liu
  2 siblings, 0 replies; 35+ messages in thread
From: Pasi Kärkkäinen @ 2013-03-02 11:20 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

On Fri, Mar 01, 2013 at 03:48:57PM -0500, Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> 
> This is the 9th version of this patch.
>

It'd be nice to have it in the subject of the email aswell..

-- Pasi

> Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
> ---
>  tools/libxc/xenctrl.h            |   20 +-
>  xen/arch/x86/hvm/hvm.c           |    8 +-
>  xen/arch/x86/mm/Makefile         |    1 +
>  xen/arch/x86/mm/dirty_vram.c     |  951 ++++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/hap/hap.c        |  111 -----
>  xen/arch/x86/mm/p2m.c            |   11 +-
>  xen/arch/x86/mm/paging.c         |   57 ++-
>  xen/arch/x86/mm/shadow/common.c  |  353 +++++++-------
>  xen/arch/x86/mm/shadow/multi.c   |  174 ++++---
>  xen/arch/x86/mm/shadow/multi.h   |    7 +-
>  xen/arch/x86/mm/shadow/types.h   |    1 +
>  xen/include/asm-x86/dirty_vram.h |  227 +++++++++
>  xen/include/asm-x86/hap.h        |    4 -
>  xen/include/asm-x86/hvm/domain.h |    2 +-
>  xen/include/asm-x86/paging.h     |   15 +-
>  xen/include/asm-x86/shadow.h     |    6 -
>  16 files changed, 1535 insertions(+), 413 deletions(-)
>  create mode 100644 xen/arch/x86/mm/dirty_vram.c
>  create mode 100644 xen/include/asm-x86/dirty_vram.h
> 
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 32122fd..cd4e1ef 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1563,15 +1563,23 @@ int xc_hvm_inject_msi(
>      xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
>  
>  /*
> - * Track dirty bit changes in the VRAM area
> + * Track dirty bit changes in a VRAM region defined by
> + * [ first_pfn : first_pfn + nr - 1 ]
>   *
>   * All of this is done atomically:
> - * - get the dirty bitmap since the last call
> - * - set up dirty tracking area for period up to the next call
> - * - clear the dirty tracking area.
> + * - gets the dirty bitmap since the last call, all zeroes for
> + *   the first call with some new region
> + * - sets up a dirty tracking region for period up to the next call
> + * - clears the specified dirty tracking region.
>   *
> - * Returns -ENODATA and does not fill bitmap if the area has changed since the
> - * last call.
> + * Creating a new region causes any existing regions that it overlaps
> + * to be discarded.
> + *
> + * Specifying nr == 0 causes all regions to be discarded and
> + * disables dirty bit tracking.
> + *
> + * If nr is not a multiple of 64, only the first nr bits of bitmap
> + * are well defined.
>   */
>  int xc_hvm_track_dirty_vram(
>      xc_interface *xch, domid_t dom,
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index fcea52c..5a97ad3 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -57,6 +57,7 @@
>  #include <asm/hvm/cacheattr.h>
>  #include <asm/hvm/trace.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/apic.h>
>  #include <public/sched.h>
> @@ -1447,8 +1448,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
>           */
>          if ( access_w )
>          {
> -            paging_mark_dirty(v->domain, mfn_x(mfn));
> -            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
> +                                 p2m_ram_rw) == p2m_ram_logdirty )
> +            {
> +                paging_mark_dirty_gpfn(v->domain, gfn);
> +            }
>          }
>          rc = 1;
>          goto out_put_gfn;
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 73dcdf4..becd0c9 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -5,6 +5,7 @@ obj-y += paging.o
>  obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
> +obj-y += dirty_vram.o
>  obj-$(x86_64) += guest_walk_4.o
>  obj-$(x86_64) += mem_event.o
>  obj-$(x86_64) += mem_paging.o
> diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
> new file mode 100644
> index 0000000..4f599ed
> --- /dev/null
> +++ b/xen/arch/x86/mm/dirty_vram.c
> @@ -0,0 +1,951 @@
> +/*
> + * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
> + * with support for multiple frame buffers.
> + *
> + * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
> + * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
> + * Parts of this code are Copyright (c) 2007 XenSource Inc.
> + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
> + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + */
> +
> +
> +#include <xen/types.h>
> +#include <xen/sched.h>
> +#include <xen/guest_access.h>
> +#include <asm/shadow.h>
> +#include <asm/dirty_vram.h>
> +#include <asm/hap.h>
> +#include <asm/config.h>
> +#include "mm-locks.h"
> +
> +#define DEBUG_stop_tracking_all_vram          0
> +#define DEBUG_allocating_dirty_vram_range     0
> +#define DEBUG_high_water_mark_for_vram_ranges 0
> +#define DEBUG_freeing_dirty_vram_range        0
> +#define DEBUG_allocate_paddr_links_page       0
> +#define DEBUG_update_vram_mapping             0
> +#define DEBUG_alloc_paddr_inject_fault        0
> +#define DEBUG_link_limit_exceeded             0
> +
> +
> +/* Allocates domain's dirty_vram structure */
> +dv_dirty_vram_t *
> +dirty_vram_alloc(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
> +    if ( dirty_vram )
> +    {
> +        INIT_LIST_HEAD(&dirty_vram->range_head);
> +        dirty_vram->ext_head = INVALID_PADDR;
> +        dirty_vram->pl_free = INVALID_PADDR;
> +    }
> +    return dirty_vram;
> +}
> +
> +/*
> + * Returns domain's dirty_vram structure,
> + * allocating it if necessary
> + */
> +dv_dirty_vram_t *
> +dirty_vram_find_or_alloc(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( !dirty_vram )
> +        dirty_vram = dirty_vram_alloc(d);
> +    return dirty_vram;
> +}
> +
> +
> +/* Free domain's dirty_vram structure */
> +void dirty_vram_free(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr, *next;
> +        dv_paddr_link_ext_t *ext;
> +        
> +        /* Free all the ranges */
> +        list_for_each_safe(curr, next, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +#if DEBUG_stop_tracking_all_vram
> +            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
> +                     range->begin_pfn, range->end_pfn);
> +#endif
> +            xfree(range->pl_tab);
> +            xfree(range);
> +        }
> +        /* Free all the extension pages */
> +        
> +        ext = remap_maddr(NULL, dirty_vram->ext_head);
> +        while ( ext ) {
> +            struct page_info *pg = virt_to_page(ext);
> +            ext = remap_maddr(ext, ext->ext_link);
> +            d->arch.paging.free_page(d, pg);
> +        }
> +
> +        xfree(dirty_vram);
> +        d->arch.hvm_domain.dirty_vram = NULL;
> +    }
> +}
> +
> +/* Returns dirty vram range containing gfn, NULL if none */
> +struct dv_range *
> +dirty_vram_range_find_gfn(struct domain *d,
> +                          unsigned long gfn)
> +{
> +    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr;
> +        list_for_each(curr, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +            if ( gfn >= range->begin_pfn &&
> +                 gfn <  range->end_pfn )
> +                return range;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
> + * NULL if none.
> + */
> +dv_range_t *
> +dirty_vram_range_find(struct domain *d,
> +                      unsigned long begin_pfn,
> +                      unsigned long nr)
> +{
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr;
> +        list_for_each(curr, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +            if ( begin_pfn == range->begin_pfn &&
> +                 end_pfn   == range->end_pfn )
> +                return range;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/* Allocate specified dirty_vram range */
> +static dv_range_t *
> +_dirty_vram_range_alloc(struct domain *d,
> +                        unsigned long begin_pfn,
> +                        unsigned long nr)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range = NULL;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_pl_entry_t *pl_tab = NULL;
> +    int i;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    ASSERT( dirty_vram != NULL );
> +
> +#if DEBUG_allocating_dirty_vram_range
> +    gdprintk(XENLOG_DEBUG,
> +             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
> +             begin_pfn, end_pfn,
> +             d->arch.hvm_domain.hap_enabled);
> +#endif
> +
> +    range = xzalloc(dv_range_t);
> +    if ( range == NULL )
> +        goto err_out;
> +
> +    INIT_LIST_HEAD(&range->range_link);
> +
> +    range->begin_pfn = begin_pfn;
> +    range->end_pfn = end_pfn;
> +
> +    if ( !hap_enabled(d) )
> +    {
> +        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
> +            goto err_out;
> +
> +        for ( i = 0; i != nr; i++ )
> +        {
> +            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
> +            pl_tab[i].mapping.pl_next = INVALID_PADDR;
> +        }
> +    }
> +
> +    range->pl_tab = pl_tab;
> +    range->mappings_hwm = 1;
> +
> +    list_add(&range->range_link, &dirty_vram->range_head);
> +    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
> +    {
> +        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
> +#if DEBUG_high_water_mark_for_vram_ranges
> +        gdprintk(XENLOG_DEBUG,
> +                 "High water mark for number of vram ranges is now:%d\n",
> +                 dirty_vram->ranges_hwm);
> +#endif
> +    }
> +    return range;
> +
> + err_out:
> +    xfree(pl_tab);
> +    xfree(range);
> +    return NULL;
> +}
> +
> +/* Frees specified dirty_vram range */
> +void dirty_vram_range_free(struct domain *d,
> +                           dv_range_t *range)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        int i, nr = range->end_pfn - range->begin_pfn;
> +
> +#if DEBUG_freeing_dirty_vram_range
> +        gdprintk(XENLOG_DEBUG,
> +                 "[%05lx:%05lx] Freeing dirty vram range\n",
> +                 range->begin_pfn, range->end_pfn);
> +#endif
> +
> +        if ( range->pl_tab )
> +        {
> +            for ( i = 0; i != nr; i++ )
> +            {
> +                dv_paddr_link_t *plx;
> +                
> +                plx = remap_maddr(NULL, range->pl_tab[i].mapping.pl_next);
> +
> +                /* Does current FB page have multiple mappings? */
> +                if ( plx ) /* Yes */
> +                {
> +                    /* Find the last element in singly-linked list */
> +                    while ( plx->pl_next != INVALID_PADDR ) {
> +                        plx = remap_maddr(plx, plx->pl_next);
> +                    }
> +                    
> +                    /* Prepend whole list to the free list */
> +                    plx->pl_next = dirty_vram->pl_free;
> +                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
> +                }
> +                /* Unmap plx. */
> +                remap_vaddr(plx, NULL);
> +            }
> +            xfree(range->pl_tab);
> +            range->pl_tab = NULL;
> +        }
> +        /* Remove range from the linked list, free it, and adjust count*/
> +        list_del(&range->range_link);
> +        xfree(range);
> +        dirty_vram->nr_ranges--;
> +    }
> +}
> +
> +/*
> + * dirty_vram_range_alloc()
> + * This function ensures that the new range does not overlap any existing
> + * ranges -- deleting them if necessary -- and then calls
> + * _dirty_vram_range_alloc to actually allocate the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_alloc(struct domain *d,
> +                        unsigned long begin_pfn,
> +                        unsigned long nr)
> +{
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range;
> +    struct list_head *curr, *next;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    ASSERT( dirty_vram != NULL );
> +
> +    /*
> +     * Ranges cannot overlap so
> +     * free any range that overlaps [ begin_pfn .. end_pfn ).
> +     */
> +    list_for_each_safe(curr, next, &dirty_vram->range_head)
> +    {
> +        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
> +        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
> +               ( begin_pfn <  rng->end_pfn   )
> +                 ) ||
> +             ( ( begin_pfn <= rng->begin_pfn ) &&
> +               ( rng->begin_pfn < end_pfn    )
> +                 ) )
> +        {
> +            /* Different tracking, tear the previous down. */
> +            dirty_vram_range_free(d, rng);
> +        }
> +    }
> +
> +    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
> +    if ( !range )
> +        goto out;
> +
> + out:
> +    return range;
> +}
> +
> +/*
> + * dirty_vram_range_find_or_alloc()
> + * Find the range for [begin_pfn:begin_pfn+nr).
> + * If it doesn't exists, create it.
> + */
> +dv_range_t *
> +dirty_vram_range_find_or_alloc(struct domain *d,
> +                                unsigned long begin_pfn,
> +                                unsigned long nr)
> +{
> +    dv_range_t *range;
> +    ASSERT( paging_locked_by_me(d) );
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if ( !range )
> +        range = dirty_vram_range_alloc(d, begin_pfn, nr);
> +    
> +    return range;
> +}
> +
> +
> +static paddr_t
> +domain_page_map_to_maddr(void *va)
> +{
> +    if ( va )
> +        return (domain_page_map_to_mfn(va) << PAGE_SHIFT) +
> +            (((unsigned long)va) & ~PAGE_MASK);
> +    else
> +        return INVALID_PADDR;
> +}
> +
> +void *
> +remap_maddr(void *old_va, paddr_t new_ma)
> +{
> +    if ( old_va )
> +        unmap_domain_page(old_va);
> +    
> +    if ( new_ma != INVALID_PADDR )
> +        return map_domain_page(new_ma >> PAGE_SHIFT) + (new_ma & ~PAGE_MASK);
> +    else
> +        return NULL;
> +}
> +
> +void *
> +remap_vaddr(void *old_va, void *new_va)
> +{
> +    if ( old_va )
> +        unmap_domain_page(old_va);
> +    
> +    if ( new_va )
> +        /* Map page now in anticipation of future matching unmap */
> +        map_domain_page(domain_page_map_to_mfn(new_va));
> +
> +    return new_va;
> +}
> +
> +
> +/* Allocate a dv_paddr_link struct */
> +static dv_paddr_link_t *
> +alloc_paddr_link(struct domain *d)
> +{
> +    dv_paddr_link_t *pl = NULL;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_ext_t *ext = NULL;
> +    
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
> +    /* Is the list of free pl's empty? */
> +    if ( dirty_vram->pl_free == INVALID_PADDR ) /* Yes. */
> +    {
> +        /*
> +         * Allocate another page of pl's.
> +         * Link them all together and point the free list head at them
> +         */
> +        int i;
> +        struct page_info *pg = d->arch.paging.alloc_page(d);
> +        if ( pg == NULL )
> +            goto out;
> +
> +        ext = __map_domain_page(pg);
> +        if ( ext == NULL )
> +        {
> +            d->arch.paging.free_page(d, pg);
> +            goto out;
> +        }
> +
> +#if DEBUG_allocate_paddr_links_page
> +        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
> +#endif
> +        /* Prepend new extension page to dirty_vram's list of same. */
> +        ext->ext_link = dirty_vram->ext_head;
> +        dirty_vram->ext_head = domain_page_map_to_maddr(ext);
> +
> +        /* Initialize and link together the new pl entries. */
> +        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
> +        {
> +            ext->entries[i].sl1ma = INVALID_PADDR;
> +            ext->entries[i].pl_next =
> +                domain_page_map_to_maddr(&ext->entries[i+1]);
> +        }
> +        /* Mark the last entry's next pointer as "null". */
> +        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = INVALID_PADDR;
> +
> +        /* Set the dirty_vram's free list pointer to the first entry. */
> +        dirty_vram->pl_free = domain_page_map_to_maddr(&ext->entries[0]);
> +    }
> +    pl = remap_maddr(NULL, dirty_vram->pl_free);
> +    dirty_vram->pl_free = pl->pl_next;
> +
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = INVALID_PADDR;
> + out:
> +    if ( ext )
> +        unmap_domain_page(ext);
> +    return pl;
> +}
> +
> +
> +/*
> + * Free a paddr_link struct.
> + *
> + * The caller has walked the singly-linked list of elements
> + * that have, as their head, an element in a pl_tab cell.
> + * The list walk has reached the element to be freed.
> + * (Each element is a dv_paddr_link_t struct.)
> + *
> + * @pl points to the element to be freed.
> + * @ppl points to its predecessor's next member.
> + *
> + * After linking the predecessor to the element's successor,
> + * we can free @pl by prepending it to the list of free
> + * elements.
> + *
> + * As a boundary case (which happens to be the common case),
> + * @pl points to a cell in the pl_tab rather than to some
> + * extension element danging from that cell.
> + * We recognize this case because @ppl is NULL.
> + * In that case we promote the first extension element by
> + * copying it into the pl_tab cell and freeing it.
> + *
> + * Returns a pointer to @pl's successor.  This is where
> + * any iterative processing will resume.
> + */
> +
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                paddr_t *ppl,
> +                dv_paddr_link_t *pl)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_t *npl; /* Next pl. */
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* Extension mapping? */
> +    if ( ppl ) /* Yes, relink around it. */
> +    {
> +        /* When done, resume processing at pl's successor. */
> +        npl = remap_maddr(NULL, pl->pl_next);
> +        (*ppl) = domain_page_map_to_maddr(npl);
> +    }
> +    else  /* pl points to entry in the main table. */
> +    {
> +        /*
> +         * Move 2nd mapping to main table
> +         * and free 2nd mapping.
> +         */
> +        dv_paddr_link_t *spl; /* 2nd mapping */
> +        spl = remap_maddr(NULL, pl->pl_next);
> +        /* Is there a 2nd mapping? */
> +        if ( spl == NULL ) /* No. */
> +        {
> +            /* Invalidate the main table entry. */
> +            pl->sl1ma = INVALID_PADDR;
> +            return pl;
> +        }
> +        /* Copy 2nd mapping into main table. */
> +        pl->sl1ma = spl->sl1ma;
> +        pl->pl_next = spl->pl_next;
> +        npl = pl; /* Reprocess main table entry again. */
> +        pl = spl; /* Prepare to free 2nd mapping. */
> +    }
> +
> +    /* Prepare entry for prepending to the free list */
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = dirty_vram->pl_free;
> +    dirty_vram->pl_free = domain_page_map_to_maddr(pl);
> +    remap_vaddr(pl, NULL); /* Unmap pl. */
> +
> +    return npl;
> +}
> +
> +
> +/*
> + * dirty_vram_range_update()
> + *
> + * This is called whenever a level 1 page table entry is modified.
> + * If the L1PTE is being cleared, the function removes any paddr_links
> + * that refer to it.
> + * If the L1PTE is being set to a frame buffer page, a paddr_link is
> + * created for that page's entry in pl_tab.
> + * Returns 1 iff entry found and set or cleared.
> + */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set)
> +{
> +    int effective = 0;
> +    dv_range_t *range;
> +    unsigned long i;
> +    dv_paddr_link_t *pl;
> +    paddr_t *ppl;
> +    int len = 0;
> +
> +    ASSERT(paging_locked_by_me(d));
> +    range = dirty_vram_range_find_gfn(d, gfn);
> +    if ( !range )
> +        return effective;
> +
> +    
> +    i = gfn - range->begin_pfn;
> +    pl = remap_vaddr(NULL, &range->pl_tab[ i ].mapping);
> +    ppl = NULL;
> +    /*
> +     * Find matching entry (pl), if any, and its predecessor (ppl)
> +     * in linked list.
> +     */
> +    while ( pl != NULL )
> +    {
> +        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
> +            break;
> +
> +        ppl = remap_vaddr(ppl, &pl->pl_next);
> +        pl =  remap_maddr(pl, (*ppl));
> +        len++;
> +    }
> +    
> +    if ( set )
> +    {
> +        /* Did we find sl1ma in either the main table or the linked list? */
> +        if ( pl == NULL ) /* No, so we'll need to alloc a link. */
> +        {
> +            ASSERT(ppl != NULL); /* ppl points to predecessor's next link. */
> +            
> +#if DEBUG_alloc_paddr_inject_fault
> +            {
> +                static int counter;
> +                
> +                /* Test stuck_dirty logic for some cases */
> +                if ( (++counter) % 4 == 0 )
> +                {
> +                    /* Simply mark the frame buffer page as always dirty */
> +                    range->pl_tab[ i ].stuck_dirty = 1;
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] inject stuck dirty fault\n",
> +                             gfn );
> +                    goto out;
> +                }
> +            }
> +#endif
> +            /*
> +             * Have we reached the limit of mappings we're willing
> +             * to bookkeep?
> +             */
> +            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* Yes. */
> +            {
> +#if DEBUG_link_limit_exceeded
> +                if ( !range->pl_tab[ i ].stuck_dirty )
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] link limit exceeded\n",
> +                             gfn );
> +#endif            
> +                /* Simply mark the frame buffer page as always dirty */
> +                range->pl_tab[ i ].stuck_dirty = 1;
> +                goto out;
> +            }
> +
> +            /* Alloc new link */
> +            pl = alloc_paddr_link(d);
> +            /* Were we able to allocate a link? */
> +            if ( pl == NULL ) /* No. */
> +            {
> +                /* Simply mark the frame buffer page as always dirty */
> +                range->pl_tab[ i ].stuck_dirty = 1;
> +                
> +                gdprintk(XENLOG_DEBUG,
> +                         "[%lx] alloc failure\n",
> +                         gfn );
> +                
> +                goto out;
> +            }
> +            /* Append new link to the list */
> +            (*ppl) = domain_page_map_to_maddr(pl);
> +        }
> +
> +        /* Did we find an entry for sl1ma? */
> +        if ( pl->sl1ma != sl1ma ) /* No. */
> +        {
> +            /* pl must point to either a previously unused entry in the
> +             * main table, or to a newly allocated link.
> +             * In either case, the link's sl1ma should be 'null'.
> +             */
> +            ASSERT(pl->sl1ma == INVALID_PADDR);
> +            pl->sl1ma = sl1ma;
> +        }
> +        effective = 1; /* Take note that we're going to set an entry. */
> +        if ( len > range->mappings_hwm )
> +        {
> +            /* Bookkeep the high water mark. */
> +            range->mappings_hwm = len;
> +#if DEBUG_update_vram_mapping
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] set      sl1ma:%lx hwm:%d "
> +                     "free_pages:%d\n",
> +                     gfn, sl1ma,
> +                     range->mappings_hwm,
> +                     d->arch.paging.shadow.free_pages);
> +#endif
> +        }
> +    }
> +    else /* Clear */
> +    {
> +        if ( pl && pl->sl1ma == sl1ma )
> +        {
> +#if DEBUG_update_vram_mapping
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] clear    sl1ma:%lx\n",
> +                     gfn, sl1ma);
> +#endif
> +            pl = free_paddr_link(d, ppl, pl);
> +            effective = 1; /* Take note that we're clearing an entry. */
> +        }
> +    }
> + out:
> +    remap_vaddr(pl,  NULL);
> +    remap_vaddr(ppl, NULL);
> +    return effective;
> +}
> +
> +
> +/*
> + * shadow_scan_dirty_flags()
> + * This produces a dirty bitmap for the range by examining every
> + * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
> + * It tests and clears each such L1PTE's dirty flag.
> + */
> +static int 
> +shadow_scan_dirty_flags(struct domain *d,
> +                        dv_range_t *range,
> +                        uint8_t *dirty_bitmap)
> +{
> +    int flush_tlb = 0;
> +    unsigned long i;
> +    unsigned long nr = range->end_pfn - range->begin_pfn;
> +    l1_pgentry_t *sl1e = NULL;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* Iterate over VRAM to track dirty bits. */
> +    for ( i = 0; i < nr; i++ )
> +    {
> +        int dirty = 0;
> +        
> +        /* Does the frame buffer have an incomplete set of mappings? */
> +        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* Yes. */
> +            dirty = 1; /* Then always assume the page is dirty. */
> +        else { /* The frame buffer's set of mappings is complete.  Scan it. */
> +            paddr_t next = range->pl_tab[i].mapping.pl_next;
> +            paddr_t sl1ma = range->pl_tab[i].mapping.sl1ma;
> +            dv_paddr_link_t *pl = NULL;
> +            for (;;)
> +            {
> +                if ( sl1ma == INVALID_PADDR )
> +                    /* End of list or frame buffer page is unmapped. */
> +                    break;
> +
> +                if ( sl1e ) /* Cleanup from previous iteration. */
> +                    unmap_domain_page(sl1e);
> +
> +                sl1e = map_domain_page(sl1ma >> PAGE_SHIFT) +
> +                    (sl1ma & ~PAGE_MASK);
> +                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
> +                {
> +                    dirty = 1;
> +                    /* Clear dirty so we can detect if page gets re-dirtied.
> +                     * Note: this is atomic, so we may clear a
> +                     * _PAGE_ACCESSED set by another processor.
> +                     */
> +                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
> +                    flush_tlb = 1;
> +                    /* Even though we now know the frame buffer page is dirty,
> +                     * keep iterating to clear the dirty flag in all other
> +                     * mappings. 
> +                     */
> +                }
> +                
> +                pl = remap_maddr(pl, next);
> +                if ( !pl )
> +                    break;
> +                
> +                next = pl->pl_next;
> +                sl1ma = pl->sl1ma;
> +            } /* for */
> +
> +            /* Unmap pl. */
> +            remap_vaddr(NULL, pl);
> +        }
> +        if ( dirty )
> +            /* Set the i'th bit in the output dirty_bitmap */
> +            dirty_bitmap[i >> 3] |= (1 << (i & 7));
> +
> +    }
> +    
> +    if ( sl1e )
> +        unmap_domain_page(sl1e);
> +
> +    return flush_tlb;
> +}
> +
> +
> +/*
> + * shadow_track_dirty_vram()
> + * This is the API called by the guest to determine which pages in the range
> + * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
> + * It creates the domain's dv_dirty_vram on demand.
> + * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
> + * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
> + * It copies the dirty bitmask into guest storage.
> + */
> +int shadow_track_dirty_vram(struct domain *d,
> +                            unsigned long begin_pfn,
> +                            unsigned long nr,
> +                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    int rc = 0;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    int flush_tlb = 0;
> +    dv_range_t *range;
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +    uint8_t *dirty_bitmap = NULL;
> +
> +    /*
> +     * This range test is tricky.
> +     *
> +     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
> +     * is a pfn beyond the end of the range.
> +     *
> +     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
> +     * invalid PFN.
> +     *
> +     * If end_pfn is beyond *that* then the range is invalid.
> +     */
> +    if ( end_pfn < begin_pfn
> +         || begin_pfn > p2m->max_mapped_pfn
> +         || end_pfn > p2m->max_mapped_pfn + 1 )
> +        return -EINVAL;
> +
> +    paging_lock(d);
> +
> +    if ( !nr )
> +    {
> +        dirty_vram_free(d);
> +        goto out;
> +    }
> +
> +    if ( guest_handle_is_null(guest_dirty_bitmap) )
> +        goto out;
> +
> +    if ( !dirty_vram_find_or_alloc(d) )
> +    {
> +        rc = -ENOMEM;
> +        goto out;
> +    }
> +
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if ( !range )
> +    {
> +        range = dirty_vram_range_alloc(d, begin_pfn, nr);
> +        if ( range )
> +            sh_find_all_vram_mappings(d->vcpu[0], range);
> +    }
> +    if ( range )
> +    {
> +        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
> +        
> +        rc = -ENOMEM;
> +        dirty_bitmap = xzalloc_bytes( size );
> +        if ( !dirty_bitmap )
> +            goto out;
> +
> +        flush_tlb |= shadow_scan_dirty_flags(d, range, dirty_bitmap);
> +
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           dirty_bitmap,
> +                           size) == 0 )
> +            rc = 0;
> +    }
> +    
> +    if ( flush_tlb )
> +        flush_tlb_mask(d->domain_dirty_cpumask);
> +
> +out:
> +    paging_unlock(d);
> +    
> +    if ( dirty_bitmap )
> +        xfree(dirty_bitmap);
> +    return rc;
> +}
> +
> +
> +/************************************************/
> +/*          HAP VRAM TRACKING SUPPORT           */
> +/************************************************/
> +
> +/*
> + * hap_track_dirty_vram()
> + * Create the domain's dv_dirty_vram struct on demand.
> + * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
> + * first encountered.
> + * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
> + * calling paging_log_dirty_range(), which interrogates each vram
> + * page's p2m type looking for pages that have been made writable.
> + */
> +int hap_track_dirty_vram(struct domain *d,
> +                         unsigned long begin_pfn,
> +                         unsigned long nr,
> +                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    long rc = 0;
> +    dv_dirty_vram_t *dirty_vram;
> +    uint8_t *dirty_bitmap = NULL;
> +
> +    if ( nr )
> +    {
> +        dv_range_t *range = NULL;
> +        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
> +        
> +        if ( !paging_mode_log_dirty(d) )
> +        {
> +            hap_logdirty_init(d);
> +            rc = paging_log_dirty_enable(d);
> +            if ( rc )
> +                goto out;
> +        }
> +
> +        rc = -ENOMEM;
> +        dirty_bitmap = xzalloc_bytes( size );
> +        if ( !dirty_bitmap )
> +            goto out;
> +        
> +        paging_lock(d);
> +        
> +        dirty_vram = d->arch.hvm_domain.dirty_vram;
> +        if ( !dirty_vram ) 
> +        {
> +            rc = -ENOMEM;
> +            if ( !(dirty_vram = dirty_vram_alloc(d)) )
> +            {
> +                paging_unlock(d);
> +                goto out;
> +            }
> +        }
> +        
> +        range = dirty_vram_range_find(d, begin_pfn, nr);
> +        if ( !range )
> +        {
> +            rc = -ENOMEM;
> +            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
> +            {
> +                paging_unlock(d);
> +                goto out;
> +            }
> +            
> +            paging_unlock(d);
> +            
> +            /* Set l1e entries of range within P2M table to be read-only. */
> +            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
> +                                  p2m_ram_rw, p2m_ram_logdirty);
> +            
> +            flush_tlb_mask(d->domain_dirty_cpumask);
> +            
> +            memset(dirty_bitmap, 0xff, size); /* Consider all pages dirty. */
> +        }
> +        else
> +        {
> +            paging_unlock(d);
> +            
> +            domain_pause(d);
> +            
> +            /* Get the bitmap. */
> +            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
> +            
> +            domain_unpause(d);
> +        }
> +        
> +        
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           dirty_bitmap,
> +                           size) == 0 )
> +        {
> +            rc = 0;
> +        }
> +    }
> +    else {
> +        paging_lock(d);
> +        
> +        dirty_vram = d->arch.hvm_domain.dirty_vram;
> +        if ( dirty_vram )
> +        {
> +            /*
> +             * If zero pages specified while tracking dirty vram
> +             * then stop tracking
> +             */
> +            dirty_vram_free(d);
> +        
> +        }
> +        
> +        paging_unlock(d);
> +    }
> +out:
> +    if ( dirty_bitmap )
> +        xfree(dirty_bitmap);
> +    
> +    return rc;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
> index f353d3a..3ac54c9 100644
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -53,117 +53,6 @@
>  #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
>  
>  /************************************************/
> -/*          HAP VRAM TRACKING SUPPORT           */
> -/************************************************/
> -
> -/*
> - * hap_track_dirty_vram()
> - * Create the domain's dv_dirty_vram struct on demand.
> - * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
> - * first encountered.
> - * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
> - * calling paging_log_dirty_range(), which interrogates each vram
> - * page's p2m type looking for pages that have been made writable.
> - */
> -
> -int hap_track_dirty_vram(struct domain *d,
> -                         unsigned long begin_pfn,
> -                         unsigned long nr,
> -                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> -{
> -    long rc = 0;
> -    struct sh_dirty_vram *dirty_vram;
> -    uint8_t *dirty_bitmap = NULL;
> -
> -    if ( nr )
> -    {
> -        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
> -
> -        if ( !paging_mode_log_dirty(d) )
> -        {
> -            hap_logdirty_init(d);
> -            rc = paging_log_dirty_enable(d);
> -            if ( rc )
> -                goto out;
> -        }
> -
> -        rc = -ENOMEM;
> -        dirty_bitmap = xzalloc_bytes(size);
> -        if ( !dirty_bitmap )
> -            goto out;
> -
> -        paging_lock(d);
> -
> -        dirty_vram = d->arch.hvm_domain.dirty_vram;
> -        if ( !dirty_vram )
> -        {
> -            rc = -ENOMEM;
> -            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
> -            {
> -                paging_unlock(d);
> -                goto out;
> -            }
> -
> -            d->arch.hvm_domain.dirty_vram = dirty_vram;
> -        }
> -
> -        if ( begin_pfn != dirty_vram->begin_pfn ||
> -             begin_pfn + nr != dirty_vram->end_pfn )
> -        {
> -            dirty_vram->begin_pfn = begin_pfn;
> -            dirty_vram->end_pfn = begin_pfn + nr;
> -
> -            paging_unlock(d);
> -
> -            /* set l1e entries of range within P2M table to be read-only. */
> -            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
> -                                  p2m_ram_rw, p2m_ram_logdirty);
> -
> -            flush_tlb_mask(d->domain_dirty_cpumask);
> -
> -            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
> -        }
> -        else
> -        {
> -            paging_unlock(d);
> -
> -            domain_pause(d);
> -
> -            /* get the bitmap */
> -            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
> -
> -            domain_unpause(d);
> -        }
> -
> -        rc = -EFAULT;
> -        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
> -            rc = 0;
> -    }
> -    else
> -    {
> -        paging_lock(d);
> -
> -        dirty_vram = d->arch.hvm_domain.dirty_vram;
> -        if ( dirty_vram )
> -        {
> -            /*
> -             * If zero pages specified while tracking dirty vram
> -             * then stop tracking
> -             */
> -            xfree(dirty_vram);
> -            d->arch.hvm_domain.dirty_vram = NULL;
> -        }
> -
> -        paging_unlock(d);
> -    }
> -out:
> -    if ( dirty_bitmap )
> -        xfree(dirty_bitmap);
> -
> -    return rc;
> -}
> -
> -/************************************************/
>  /*            HAP LOG DIRTY SUPPORT             */
>  /************************************************/
>  
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index de1dd82..6f638a2 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -741,20 +741,23 @@ void p2m_change_type_range(struct domain *d,
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>      BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
> -
>      p2m_lock(p2m);
> -    p2m->defer_nested_flush = 1;
>  
> +    p2m->defer_nested_flush = 1;
> +    
>      for ( gfn = start; gfn < end; gfn++ )
>      {
>          mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
>          if ( pt == ot )
> -            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
> +            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
> +                          p2m->default_access);
>      }
> -
> +    
>      p2m->defer_nested_flush = 0;
> +
>      if ( nestedhvm_enabled(d) )
>          p2m_flush_nestedp2m(d);
> +
>      p2m_unlock(p2m);
>  }
>  
> diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
> index ac9bb1a..d59e8d6 100644
> --- a/xen/arch/x86/mm/paging.c
> +++ b/xen/arch/x86/mm/paging.c
> @@ -27,6 +27,7 @@
>  #include <asm/p2m.h>
>  #include <asm/hap.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <xen/numa.h>
>  #include <xsm/xsm.h>
>  
> @@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
>      return ret;
>  }
>  
> -/* Mark a page as dirty */
> +/* Given a guest mfn, mark a page as dirty */
>  void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>  {
>      unsigned long pfn;
>      mfn_t gmfn;
> -    int changed;
> -    mfn_t mfn, *l4, *l3, *l2;
> -    unsigned long *l1;
> -    int i1, i2, i3, i4;
>  
>      gmfn = _mfn(guest_mfn);
>  
> @@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>  
>      /* We /really/ mean PFN here, even for non-translated guests. */
>      pfn = get_gpfn_from_mfn(mfn_x(gmfn));
> +    paging_mark_dirty_gpfn(d, pfn);
> +}
> +
> +
> +/* Given a guest pfn, mark a page as dirty */
> +void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
> +{
> +    int changed;
> +    mfn_t mfn, *l4, *l3, *l2;
> +    unsigned long *l1;
> +    int i1, i2, i3, i4;
> +    dv_range_t *range;
> +    
>      /* Shared MFNs should NEVER be marked dirty */
>      BUG_ON(SHARED_M2P(pfn));
>  
> @@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>      /* Recursive: this is called from inside the shadow code */
>      paging_lock_recursive(d);
>  
> +    d->arch.paging.log_dirty.dirty_count++;
> +    range = dirty_vram_range_find_gfn(d, pfn);
> +    if ( range )
> +        range->dirty_count++;
> +
>      if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
>      {
>           d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
> @@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>      int i;
>      unsigned long pfn;
> +    dv_range_t *range;
> +    unsigned int range_dirty_count;
>  
> +    paging_lock(d);
> +    range = dirty_vram_range_find_gfn(d, begin_pfn);
> +    if ( !range )
> +    {
> +        paging_unlock(d);
> +        goto out;
> +    }
> +    
> +    range_dirty_count = range->dirty_count;
> +    range->dirty_count = 0;
> +
> +    paging_unlock(d);
> +    
> +    if ( !range_dirty_count)
> +        goto out;
> +
> +    PAGING_DEBUG(LOGDIRTY,
> +                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
> +                 d->domain_id,
> +                 begin_pfn,
> +                 begin_pfn + nr,
> +                 range_dirty_count);
>      /*
>       * Set l1e entries of P2M table to be read-only.
>       *
> @@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
>  
>      for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
>      {
> -        p2m_type_t pt;
> -        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
> -        if ( pt == p2m_ram_rw )
> +        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
> +             p2m_ram_rw )
>              dirty_bitmap[i >> 3] |= (1 << (i & 7));
>      }
>  
>      p2m_unlock(p2m);
>  
>      flush_tlb_mask(d->domain_dirty_cpumask);
> +
> + out:
> +    return;
>  }
>  
>  /* Note that this function takes three function pointers. Callers must supply
> diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
> index 292c1f7..b72a6bb 100644
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -36,6 +36,7 @@
>  #include <asm/current.h>
>  #include <asm/flushtlb.h>
>  #include <asm/shadow.h>
> +#include <asm/dirty_vram.h>
>  #include <xen/numa.h>
>  #include "private.h"
>  
> @@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
>       * calls now that we've torn down the bitmap */
>      d->arch.paging.mode &= ~PG_log_dirty;
>  
> -    if (d->arch.hvm_domain.dirty_vram) {
> -        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
> -        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
> -        xfree(d->arch.hvm_domain.dirty_vram);
> -        d->arch.hvm_domain.dirty_vram = NULL;
> -    }
> +    dirty_vram_free(d);
>  
>      paging_unlock(d);
>  
> @@ -3464,194 +3460,219 @@ void shadow_clean_dirty_bitmap(struct domain *d)
>  
>  /**************************************************************************/
>  /* VRAM dirty tracking support */
> -int shadow_track_dirty_vram(struct domain *d,
> -                            unsigned long begin_pfn,
> -                            unsigned long nr,
> -                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
> -{
> -    int rc;
> -    unsigned long end_pfn = begin_pfn + nr;
> -    unsigned long dirty_size = (nr + 7) / 8;
> -    int flush_tlb = 0;
> -    unsigned long i;
> -    p2m_type_t t;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> -    struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
> -    if (end_pfn < begin_pfn
> -            || begin_pfn > p2m->max_mapped_pfn
> -            || end_pfn >= p2m->max_mapped_pfn)
> -        return -EINVAL;
>  
> -    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
> -    p2m_lock(p2m_get_hostp2m(d));
> -    paging_lock(d);
> +/* Support functions for shadow-based dirty VRAM code */
>  
> -    if ( dirty_vram && (!nr ||
> -             ( begin_pfn != dirty_vram->begin_pfn
> -            || end_pfn   != dirty_vram->end_pfn )) )
> -    {
> -        /* Different tracking, tear the previous down. */
> -        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
> -        xfree(dirty_vram->sl1ma);
> -        xfree(dirty_vram->dirty_bitmap);
> -        xfree(dirty_vram);
> -        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
> -    }
> +#define DEBUG_unshadow_sl1ma                  0          
> +#define DEBUG_unshadow_sl1ma_detail           0
> +#define DEBUG_count_initial_mappings          0
>  
> -    if ( !nr )
> +/* smfn is no longer a shadow page.  Remove it from any
> + * dirty vram range mapping. */
> +void
> +dirty_vram_delete_shadow(struct vcpu *v,
> +                         unsigned long gfn,
> +                         unsigned int shadow_type, 
> +                         mfn_t smfn)
> +{
> +    static unsigned int l1_shadow_mask = 
> +          1 << SH_type_l1_32_shadow
> +        | 1 << SH_type_fl1_32_shadow
> +        | 1 << SH_type_l1_pae_shadow
> +        | 1 << SH_type_fl1_pae_shadow
> +        | 1 << SH_type_l1_64_shadow
> +        | 1 << SH_type_fl1_64_shadow
> +        ;
> +    struct domain *d = v->domain;
> +    dv_dirty_vram_t *dirty_vram;
> +    struct list_head *curr, *next;
> +    dv_paddr_link_t *pl = NULL;
> +    paddr_t *ppl = NULL;
> +    
> +    ASSERT(paging_locked_by_me(d));
> +    /* Ignore all but level 1 shadows */
> +    
> +    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
>      {
> -        rc = 0;
>          goto out;
>      }
>  
> -    /* This should happen seldomly (Video mode change),
> -     * no need to be careful. */
> +    dirty_vram = d->arch.hvm_domain.dirty_vram;
>      if ( !dirty_vram )
>      {
> -        /* Throw away all the shadows rather than walking through them 
> -         * up to nr times getting rid of mappings of each pfn */
> -        shadow_blow_tables(d);
> -
> -        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
> -
> -        rc = -ENOMEM;
> -        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
> -            goto out;
> -        dirty_vram->begin_pfn = begin_pfn;
> -        dirty_vram->end_pfn = end_pfn;
> -        d->arch.hvm_domain.dirty_vram = dirty_vram;
> -
> -        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
> -            goto out_dirty_vram;
> -        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
> -
> -        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
> -            goto out_sl1ma;
> -
> -        dirty_vram->last_dirty = NOW();
> -
> -        /* Tell the caller that this time we could not track dirty bits. */
> -        rc = -ENODATA;
> -    }
> -    else if (dirty_vram->last_dirty == -1)
> -    {
> -        /* still completely clean, just copy our empty bitmap */
> -        rc = -EFAULT;
> -        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
> -            rc = 0;
> +        goto out;
>      }
> -    else
> +        
> +    list_for_each_safe(curr, next, &dirty_vram->range_head)
>      {
> -        unsigned long map_mfn = INVALID_MFN;
> -        void *map_sl1p = NULL;
> -
> -        /* Iterate over VRAM to track dirty bits. */
> -        for ( i = 0; i < nr; i++ ) {
> -            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
> -            struct page_info *page;
> -            int dirty = 0;
> -            paddr_t sl1ma = dirty_vram->sl1ma[i];
> +        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +        unsigned long i;
> +        int max_mappings = 1, mappings = 0;
> +        int unshadowed = 0;
> +        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
> +        {
> +            
> +            pl = remap_vaddr(pl, &range->pl_tab[ i ].mapping);
> +            /* clean up from previous iteration */
> +            ppl = remap_vaddr(ppl, NULL); /* unmap ppl */
>  
> -            if (mfn_x(mfn) == INVALID_MFN)
> -            {
> -                dirty = 1;
> -            }
> -            else
> +            mappings = 0;
> +            
> +            while ( pl != NULL )
>              {
> -                page = mfn_to_page(mfn);
> -                switch (page->u.inuse.type_info & PGT_count_mask)
> -                {
> -                case 0:
> -                    /* No guest reference, nothing to track. */
> -                    break;
> -                case 1:
> -                    /* One guest reference. */
> -                    if ( sl1ma == INVALID_PADDR )
> -                    {
> -                        /* We don't know which sl1e points to this, too bad. */
> -                        dirty = 1;
> -                        /* TODO: Heuristics for finding the single mapping of
> -                         * this gmfn */
> -                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
> -                    }
> -                    else
> -                    {
> -                        /* Hopefully the most common case: only one mapping,
> -                         * whose dirty bit we can use. */
> -                        l1_pgentry_t *sl1e;
> -                        unsigned long sl1mfn = paddr_to_pfn(sl1ma);
> -
> -                        if ( sl1mfn != map_mfn )
> -                        {
> -                            if ( map_sl1p )
> -                                sh_unmap_domain_page(map_sl1p);
> -                            map_sl1p = sh_map_domain_page(_mfn(sl1mfn));
> -                            map_mfn = sl1mfn;
> -                        }
> -                        sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
> -
> -                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
> -                        {
> -                            dirty = 1;
> -                            /* Note: this is atomic, so we may clear a
> -                             * _PAGE_ACCESSED set by another processor. */
> -                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
> -                            flush_tlb = 1;
> -                        }
> -                    }
> -                    break;
> -                default:
> -                    /* More than one guest reference,
> -                     * we don't afford tracking that. */
> -                    dirty = 1;
> +                paddr_t sl1ma = pl->sl1ma;
> +                unsigned long sl1mn;
> +               
> +                if ( sl1ma == INVALID_PADDR )
>                      break;
> +                
> +                sl1mn = sl1ma >> PAGE_SHIFT;
> +                if ( sl1mn == mfn_x(smfn) ) {
> +#if DEBUG_unshadow_sl1ma_detail
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
> +                             mfn_x(smfn),
> +                             range->begin_pfn + i,
> +                             sl1ma);
> +#endif
> +                    unshadowed++;
> +                    pl = free_paddr_link(d, ppl, pl);
> +                    /* ppl remains unchanged. */
> +                }
> +                else
> +                {
> +                    ppl = remap_vaddr(ppl, &pl->pl_next);
> +                    pl =  remap_maddr(pl, (*ppl));
> +                    mappings++;
>                  }
> -            }
> -
> -            if ( dirty )
> -            {
> -                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -                dirty_vram->last_dirty = NOW();
>              }
>          }
> +        if ( mappings > max_mappings )
> +            max_mappings = mappings;
> +        
> +        if ( unshadowed ) {
> +#if DEBUG_unshadow_sl1ma
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
> +                     "max_mappings:%d\n",
> +                     mfn_x(smfn),
> +                     range->begin_pfn, range->end_pfn,
> +                     unshadowed, range->nr_mappings, max_mappings);
> +#endif
> +        }
> +    }
> + out:
> +    remap_vaddr(pl,  NULL); /* unmap pl */
> +    remap_vaddr(ppl, NULL); /* unmap ppl */
> +    return;
> +}
> +
>  
> -        if ( map_sl1p )
> -            sh_unmap_domain_page(map_sl1p);
> +typedef int (*hash_pfn_callback_t)(struct vcpu *v,
> +                                   mfn_t smfn,
> +                                   unsigned long begin_pfn,
> +                                   unsigned long end_pfn,
> +                                   int *removed);
>  
> -        rc = -EFAULT;
> -        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
> -            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
> -            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
> +static int hash_pfn_foreach(struct vcpu *v, 
> +                            unsigned int callback_mask, 
> +                            hash_pfn_callback_t callbacks[], 
> +                            unsigned long begin_pfn,
> +                            unsigned long end_pfn)
> +/* Walk the hash table looking at the types of the entries and 
> + * calling the appropriate callback function for each entry. 
> + * The mask determines which shadow types we call back for, and the array
> + * of callbacks tells us which function to call.
> + * Any callback may return non-zero to let us skip the rest of the scan. 
> + *
> + * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
> + * then return non-zero to terminate the scan. */
> +{
> +    int i, done = 0, removed = 0;
> +    struct domain *d = v->domain;
> +    struct page_info *x;
> +
> +    /* Say we're here, to stop hash-lookups reordering the chains */
> +    ASSERT(paging_locked_by_me(d));
> +    ASSERT(d->arch.paging.shadow.hash_walking == 0);
> +    d->arch.paging.shadow.hash_walking = 1;
> +
> +    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
> +    {
> +        /* WARNING: This is not safe against changes to the hash table.
> +         * The callback *must* return non-zero if it has inserted or
> +         * deleted anything from the hash (lookups are OK, though). */
> +        for ( x = d->arch.paging.shadow.hash_table[i];
> +              x;
> +              x = next_shadow(x) )
> +        {
> +            if ( callback_mask & (1 << x->u.sh.type) )
>              {
> -                /* was clean for more than two seconds, try to disable guest
> -                 * write access */
> -                for ( i = begin_pfn; i < end_pfn; i++ ) {
> -                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
> -                    if (mfn_x(mfn) != INVALID_MFN)
> -                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
> -                }
> -                dirty_vram->last_dirty = -1;
> +                ASSERT(x->u.sh.type <= 15);
> +                ASSERT(callbacks[x->u.sh.type] != NULL);
> +                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
> +                                               begin_pfn, end_pfn,
> +                                               &removed);
> +                if ( done ) break;
>              }
> -            rc = 0;
>          }
> +        if ( done ) break; 
>      }
> -    if ( flush_tlb )
> -        flush_tlb_mask(d->domain_dirty_cpumask);
> -    goto out;
> +    d->arch.paging.shadow.hash_walking = 0;
> +    return removed;
> +}
> +
> +void sh_find_all_vram_mappings(struct vcpu *v,
> +                               dv_range_t *range)
> +{
> +    /* Dispatch table for getting per-type functions */
> +    static hash_pfn_callback_t callbacks[SH_type_unused] = {
> +        NULL, /* none    */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
> +        NULL, /* l2_32   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
> +        NULL, /* l2_pae  */
> +        NULL, /* l2h_pae */
> +#if CONFIG_PAGING_LEVELS >= 4
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
> +#else
> +        NULL, /* l1_64   */
> +        NULL, /* fl1_64  */
> +#endif
> +        NULL, /* l2_64   */
> +        NULL, /* l2h_64  */
> +        NULL, /* l3_64   */
> +        NULL, /* l4_64   */
> +        NULL, /* p2m     */
> +        NULL  /* unused  */
> +    };
>  
> -out_sl1ma:
> -    xfree(dirty_vram->sl1ma);
> -out_dirty_vram:
> -    xfree(dirty_vram);
> -    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
> +    static unsigned int callback_mask = 
> +          1 << SH_type_l1_32_shadow
> +        | 1 << SH_type_fl1_32_shadow
> +        | 1 << SH_type_l1_pae_shadow
> +        | 1 << SH_type_fl1_pae_shadow
> +        | 1 << SH_type_l1_64_shadow
> +        | 1 << SH_type_fl1_64_shadow
> +        ;
>  
> -out:
> -    paging_unlock(d);
> -    p2m_unlock(p2m_get_hostp2m(d));
> -    return rc;
> +    perfc_incr(shadow_mappings);
> +
> +    hash_pfn_foreach(v, callback_mask, callbacks,
> +                     range->begin_pfn,
> +                     range->end_pfn);
> +
> +#if DEBUG_count_initial_mappings
> +    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial\n",
> +             range->begin_pfn, range->end_pfn
> +#endif
>  }
>  
> +
>  /**************************************************************************/
>  /* Shadow-control XEN_DOMCTL dispatcher */
>  
> diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
> index b79cd6c..922e01a 100644
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -35,6 +35,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/cacheattr.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/guest_pt.h>
>  #include <public/sched.h>
> @@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
>      SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
>                     gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
>      ASSERT(mfn_to_page(smfn)->u.sh.head);
> +
> +    /* Removing any dv_paddr_links to the erstwhile shadow page */
> +    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
> +    
>      shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
>  }
>  
> @@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
>                     v->domain->domain_id, v->vcpu_id,
>                     mfn_x(gmfn), shadow_type, mfn_x(smfn));
>      ASSERT(mfn_to_page(smfn)->u.sh.head);
> +    
> +    /* Removing any dv_paddr_links to the erstwhile shadow page */
> +    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
> +    
>      shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
>      /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
>      if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
> @@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
>      guest_l1e_t guest_entry = { guest_intpte };
>      shadow_l1e_t *sp = shadow_entry_ptr;
>      struct domain *d = v->domain;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
>      gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
>      u32 pass_thru_flags;
>      u32 gflags, sflags;
> @@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
>          }
>      }
>  
> -    if ( unlikely((level == 1) && dirty_vram
> -            && dirty_vram->last_dirty == -1
> -            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
> -            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
> -    {
> -        if ( ft & FETCH_TYPE_WRITE )
> -            dirty_vram->last_dirty = NOW();
> -        else
> -            sflags &= ~_PAGE_RW;
> -    }
> -
>      /* Read-only memory */
>      if ( p2m_is_readonly(p2mt) ||
>           (p2mt == p2m_mmio_direct &&
> @@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
>      return flags;
>  }
>  
> -static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
> +/* shadow_vram_fix_l1e()
> + *
> + * Tests L1PTEs as they are modified, looking for when they start to (or
> + * cease to) point to frame buffer pages.  If the old and new gfns differ,
> + * calls dirty_vram_range_update() to updates the dirty_vram structures.
> + */
> +static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
> +                                       shadow_l1e_t new_sl1e,
>                                         shadow_l1e_t *sl1e,
>                                         mfn_t sl1mfn,
>                                         struct domain *d)
>  { 
> -    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
> -    int flags = shadow_l1e_get_flags(new_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    mfn_t new_mfn, old_mfn;
> +    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
> +    paddr_t sl1ma;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
>  
> -    if ( !dirty_vram         /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> +    if ( !dirty_vram )
>          return;
>  
> -    gfn = mfn_to_gfn(d, mfn);
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> +    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
>  
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    old_mfn = shadow_l1e_get_mfn(old_sl1e);
> +
> +    if ( !sh_l1e_is_magic(old_sl1e) &&
> +         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
> +         mfn_valid(old_mfn))
>      {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -            /* Initial guest reference, record it */
> -            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
> -                | ((unsigned long)sl1e & ~PAGE_MASK);
> +        old_gfn = mfn_to_gfn(d, old_mfn);
>      }
> -}
> -
> -static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
> -                                       shadow_l1e_t *sl1e,
> -                                       mfn_t sl1mfn,
> -                                       struct domain *d)
> -{
> -    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
> -    int flags = shadow_l1e_get_flags(old_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> -
> -    if ( !dirty_vram         /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> -        return;
> -
> -    gfn = mfn_to_gfn(d, mfn);
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> -
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    
> +    new_mfn = shadow_l1e_get_mfn(new_sl1e);
> +    if ( !sh_l1e_is_magic(new_sl1e) &&
> +         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
> +         mfn_valid(new_mfn))
>      {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        int dirty = 0;
> -        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
> -            | ((unsigned long)sl1e & ~PAGE_MASK);
> +        new_gfn = mfn_to_gfn(d, new_mfn);
> +    }
>  
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -        {
> -            /* Last reference */
> -            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
> -                /* We didn't know it was that one, let's say it is dirty */
> -                dirty = 1;
> -            }
> -            else
> -            {
> -                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -                if ( flags & _PAGE_DIRTY )
> -                    dirty = 1;
> -            }
> -        }
> -        else
> +    if ( old_gfn == new_gfn ) return;
> +
> +    if ( VALID_M2P(old_gfn) )
> +        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
>          {
> -            /* We had more than one reference, just consider the page dirty. */
> -            dirty = 1;
> -            /* Check that it's not the one we recorded. */
> -            if ( dirty_vram->sl1ma[i] == sl1ma )
> -            {
> -                /* Too bad, we remembered the wrong one... */
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -            }
> -            else
> -            {
> -                /* Ok, our recorded sl1e is still pointing to this page, let's
> -                 * just hope it will remain. */
> -            }
> +            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
> +                          old_gfn, mfn_x(old_mfn));
>          }
> -        if ( dirty )
> +
> +    if ( VALID_M2P(new_gfn) )
> +        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
>          {
> -            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -            dirty_vram->last_dirty = NOW();
> +            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
> +                          new_gfn, mfn_x(new_mfn));
>          }
> -    }
>  }
>  
>  static int shadow_set_l1e(struct vcpu *v, 
> @@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
>                  shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
>                  /* fall through */
>              case 0:
> -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
>                  break;
>              }
>          }
>      } 
>  
> +    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
> +
>      /* Write the new entry */
>      shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
>      flags |= SHADOW_SET_CHANGED;
> @@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
>           * trigger a flush later. */
>          if ( shadow_mode_refcounts(d) ) 
>          {
> -            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
>              shadow_put_page_from_l1e(old_sl1e, d);
>              TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
>          } 
> @@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
>          SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
>              if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
>                   && !sh_l1e_is_magic(*sl1e) ) {
> -                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
>                  shadow_put_page_from_l1e(*sl1e, d);
>              }
>          });
> @@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
>      return done;
>  }
>  
> +
> +int sh_find_vram_mappings_in_l1(struct vcpu *v,
> +                                mfn_t sl1mfn,
> +                                unsigned long begin_pfn,
> +                                unsigned long end_pfn,
> +                                int *removed)
> +/* Find all VRAM mappings in this shadow l1 table */
> +{
> +    struct domain *d = v->domain;
> +    shadow_l1e_t *sl1e;
> +    int done = 0;
> +
> +    /* only returns _PAGE_PRESENT entries */
> +    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
> +    {
> +        unsigned long gfn;
> +        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
> +        if ( !mfn_valid(gmfn) )
> +            continue;
> +        gfn = mfn_to_gfn(d, gmfn);
> +        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
> +        {
> +            paddr_t sl1ma =
> +                pfn_to_paddr(mfn_x(sl1mfn)) |
> +                ( (unsigned long)sl1e & ~PAGE_MASK );
> +            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
> +        }
> +    });
> +    return 0;
> +}
> +
>  /**************************************************************************/
>  /* Functions to excise all pointers to shadows from higher-level shadows. */
>  
> diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
> index 835121e..436a4ac 100644
> --- a/xen/arch/x86/mm/shadow/multi.h
> +++ b/xen/arch/x86/mm/shadow/multi.h
> @@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
>  extern int
>  SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
>      (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
> -
> +extern int
> +SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
> +     (struct vcpu *v, mfn_t sl1mfn, 
> +      unsigned long begin_pfn,
> +      unsigned long end_pfn,
> +      int *removed);
>  extern void
>  SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
>      (struct vcpu *v, void *ep, mfn_t smfn);
> diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
> index 43ce1db..5b0f9f7 100644
> --- a/xen/arch/x86/mm/shadow/types.h
> +++ b/xen/arch/x86/mm/shadow/types.h
> @@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
>  #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
>  #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
>  #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
> +#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
>  #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
>  #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
>  #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
> diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
> new file mode 100644
> index 0000000..727af05
> --- /dev/null
> +++ b/xen/include/asm-x86/dirty_vram.h
> @@ -0,0 +1,227 @@
> +/****************************************************************************
> + * include/asm-x86/dirty_vram.h
> + *
> + * Interface for tracking dirty VRAM pages
> + *
> + * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
> + * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
> + * Parts of this code are Copyright (c) 2006 by XenSource Inc.
> + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
> + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#ifndef _DIRTY_VRAM_H
> +#define _DIRTY_VRAM_H
> +
> +/*
> + * In shadow mode we need to bookkeep all the L1 page table entries that
> + * map a frame buffer page.  Struct dv_paddr_link does this by
> + * recording the address of a L1 page table entry for some frame buffer page.
> + * Also has a link to additional pl entries if the frame buffer page
> + * has multiple mappings.
> + * In practice very few pages have multiple mappings.
> + * But to rule out some pathological situation, we limit the number of
> + * mappings we're willing to bookkeep.
> + */
> +
> +#define DV_ADDR_LINK_LIST_LIMIT 64
> +
> +typedef struct dv_paddr_link {
> +    paddr_t sl1ma;
> +    paddr_t pl_next;
> +} dv_paddr_link_t;
> +
> +typedef struct dv_pl_entry {
> +    dv_paddr_link_t mapping;
> +    bool_t stuck_dirty;
> +} dv_pl_entry_t;
> +
> +/*
> + * This defines an extension page of pl entries for FB pages with multiple
> + * mappings. All such pages (of a domain) are linked together.
> + */
> +typedef struct dv_paddr_link_ext {
> +    paddr_t ext_link;
> +    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( paddr_t ) ) /
> +                             sizeof( dv_paddr_link_t ) ];
> +} dv_paddr_link_ext_t;
> +
> +/*
> + * This defines a single frame buffer range.  It bookkeeps all the
> + * level 1 PTEs that map guest pages within that range.
> + * All such ranges (of a domain) are linked together.
> + */
> +typedef struct dv_range {
> +    struct list_head range_link; /* the several ranges form a linked list */
> +    unsigned long begin_pfn;
> +    unsigned long end_pfn;
> +    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
> +    int mappings_hwm; /* high water mark of max mapping count */
> +    unsigned int dirty_count;
> +} dv_range_t;
> +
> +/*
> + * This contains all the data structures required by a domain to
> + * bookkeep the dirty pages within its frame buffers.
> + */
> +typedef struct dv_dirty_vram {
> +    struct list_head range_head; /* head of the linked list of ranges */
> +    paddr_t ext_head;/* head of list of extension pages */
> +    paddr_t pl_free; /* free list of pl's within extension pages */
> +    int nr_ranges;   /* bookkeeps number of ranges */
> +    int ranges_hwm;  /* high water mark of max number of ranges */
> +} dv_dirty_vram_t;
> +
> +/* Allocates domain's dirty_vram structure */
> +dv_dirty_vram_t *
> +dirty_vram_alloc(struct domain *d);
> +
> +/*
> + * Returns domain's dirty_vram structure,
> + * allocating it if necessary
> + */
> +dv_dirty_vram_t *
> +dirty_vram_find_or_alloc(struct domain *d);
> +
> +/* Frees domain's dirty_vram structure */
> +void dirty_vram_free(struct domain *d);
> +
> +/* Returns dirty vram range containing gfn, NULL if none */
> +struct dv_range *
> +dirty_vram_range_find_gfn(struct domain *d,
> +                          unsigned long gfn);
> +
> +/*
> + * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
> + * NULL if none
> + */
> +dv_range_t *
> +dirty_vram_range_find(struct domain *d,
> +                      unsigned long begin_pfn,
> +                      unsigned long nr);
> +
> +/*
> + * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
> + * freeing any existing range that overlaps the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_alloc(struct domain *d,
> +                       unsigned long begin_pfn,
> +                       unsigned long nr);
> +
> +/*
> + * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
> + * creating a range if none already exists and
> + * freeing any existing range that overlaps the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_find_or_alloc(struct domain *d,
> +                               unsigned long begin_pfn,
> +                               unsigned long nr);
> +
> +void dirty_vram_range_free(struct domain *d,
> +                           dv_range_t *range);
> +
> +/* Bookkeep PTE address of a frame buffer page */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set);
> +
> +/*
> + * smfn is no longer a shadow page.  Remove it from any
> + * dirty vram range mapping.
> + */
> +void
> +dirty_vram_delete_shadow(struct vcpu *v,
> +                         unsigned long gfn,
> +                         unsigned int shadow_type,
> +                         mfn_t smfn);
> +
> +
> +/*
> + * Scan all the L1 tables looking for VRAM mappings.
> + * Record them in the domain's dv_dirty_vram structure
> + */
> +void sh_find_all_vram_mappings(struct vcpu *v,
> +                               dv_range_t *range);
> +
> +/*
> + * Free a paddr_link struct, given address of its
> + * predecessor in singly-linked list
> + */
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                paddr_t* ppl,
> +                dv_paddr_link_t *pl);
> +
> +
> +/* Enable VRAM dirty tracking. */
> +int
> +shadow_track_dirty_vram(struct domain *d,
> +                        unsigned long first_pfn,
> +                        unsigned long nr,
> +                        XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> +
> +int
> +hap_track_dirty_vram(struct domain *d,
> +                     unsigned long begin_pfn,
> +                     unsigned long nr,
> +                     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> +
> +void
> +hap_clean_vram_tracking_range(struct domain *d,
> +                              unsigned long begin_pfn,
> +                              unsigned long nr,
> +                              uint8_t *dirty_bitmap);
> +
> +/* Unmap a va and map a ma.
> + * This is used when walking a linked list in which
> + * the links are stored as ma's rather than va's.
> + * Each step in the walk unmaps the va returned
> + * by the previous iteration and maps the next link.
> + *
> + * Unmaps @old_va if not NULL.
> + *
> + * If @new_va is not INVALID_PADDR, maps it and returns the resulting
> + * va (which must be subsequently unmapped.) Else returns NULL.
> + */
> +void *
> +remap_maddr(void *old_va, paddr_t new_ma);
> +
> +
> +/* Like the above but it acquires a mapping
> + * (possibly an additional mapping) on @new_va.
> + *
> + * Returns @new_va.
> + *
> + * Does no unmapping if @old_va is NULL.
> + * Does no mapping if @new_va is NULL.
> + */
> +void *
> +remap_vaddr(void *old_va, void *new_va);
> +
> +#endif /* _DIRTY_VRAM_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
> index 916a35b..3e3a1f5 100644
> --- a/xen/include/asm-x86/hap.h
> +++ b/xen/include/asm-x86/hap.h
> @@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
>  void  hap_teardown(struct domain *d);
>  void  hap_vcpu_init(struct vcpu *v);
>  void  hap_logdirty_init(struct domain *d);
> -int   hap_track_dirty_vram(struct domain *d,
> -                           unsigned long begin_pfn,
> -                           unsigned long nr,
> -                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
>  
>  extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
>  
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
> index 27b3de5..0cc7b05 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -74,7 +74,7 @@ struct hvm_domain {
>      struct list_head       pinned_cacheattr_ranges;
>  
>      /* VRAM dirty support. */
> -    struct sh_dirty_vram *dirty_vram;
> +    struct dv_dirty_vram  *dirty_vram;
>  
>      /* If one of vcpus of this domain is in no_fill_mode or
>       * mtrr/pat between vcpus is not the same, set is_in_uc_mode
> diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
> index c3a8848..e22df38 100644
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
>                             int  (*disable_log_dirty)(struct domain *d),
>                             void (*clean_dirty_bitmap)(struct domain *d));
>  
> -/* mark a page as dirty */
> +/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
>  void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
>  
> +/* mark a gpfn as dirty */
> +void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
> +
> +
>  /* is this guest page dirty? 
>   * This is called from inside paging code, with the paging lock held. */
>  int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
> @@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
>  #define L4_LOGDIRTY_IDX(pfn) 0
>  #endif
>  
> -/* VRAM dirty tracking support */
> -struct sh_dirty_vram {
> -    unsigned long begin_pfn;
> -    unsigned long end_pfn;
> -    paddr_t *sl1ma;
> -    uint8_t *dirty_bitmap;
> -    s_time_t last_dirty;
> -};
> -
>  /*****************************************************************************
>   * Entry points into the paging-assistance code */
>  
> diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
> index 2eb6efc..940d7fd 100644
> --- a/xen/include/asm-x86/shadow.h
> +++ b/xen/include/asm-x86/shadow.h
> @@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
>  /* Enable an arbitrary shadow mode.  Call once at domain creation. */
>  int shadow_enable(struct domain *d, u32 mode);
>  
> -/* Enable VRAM dirty bit tracking. */
> -int shadow_track_dirty_vram(struct domain *d,
> -                            unsigned long first_pfn,
> -                            unsigned long nr,
> -                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> -
>  /* Handler for shadow control ops: operations from user-space to enable
>   * and disable ephemeral shadow modes (test mode and log-dirty mode) and
>   * manipulate the log-dirty bitmap. */
> -- 
> 1.7.9.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-24 11:25 ` Tim Deegan
  2013-02-05 11:38   ` Robert Phillips
@ 2013-03-01 20:52   ` Robert Phillips
  1 sibling, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2013-03-01 20:52 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: Jan Beulich, xen-devel



> -----Original Message-----
> From: Tim Deegan [mailto:tim@xen.org]
> Sent: Thursday, January 24, 2013 6:25 AM
> To: Robert Phillips
> Cc: Jan Beulich; xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers
> in Xen
> 
> Hi,
> 
> At 14:28 -0500 on 21 Jan (1358778509), Robert Phillips wrote:
> > Support is provided for both shadow and hardware assisted paging (HAP)
> > modes. This code bookkeeps the set of video frame buffers (vram),
> > detects when the guest has modified any of those buffers and, upon
> request,
> > returns a bitmap of the modified pages.
> > This lets other software components re-paint the portions of the monitor
> > (or monitors) that have changed.
> > Each monitor has a frame buffer of some size at some position
> > in guest physical memory.
> > The set of frame buffers being tracked can change over time as monitors
> > are plugged and unplugged.
> 
> This doesn't apply to tip of xen-unstable.  

Hopefully the patch I just submitted applies ok.

>Also:
> 
> > +        ext = __map_domain_page(pg);
> > +        /* Is unmapped in dirty_vram_free() */
> 
> Mappings from map_domain_page() can't be kept around like this.  They're
> supposed to be short-term, and in systems where we don't have a full 1-1
> map of memory (e.g. x86 once Jan's 16TB-support series goes in) there
> are a limited number of mapping slots.
> 
> Jan, what do you recommend here?  These are pages of linked-list
> entries, part of the p2m/mm overhead and so allocated from the guest's
> shadow memory.  Are we going to have to allocate them from xenheap
> instead or is there any way to avoid that?
> 
> Tim.

I have avoided long-duration mappings.  Instead the linked lists are constructed from machine addresses and are mapped as necessary.

-- rsp

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2013-03-01 20:48 Robert Phillips
  2013-03-02 11:20 ` Pasi Kärkkäinen
                   ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Robert Phillips @ 2013-03-01 20:48 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP)
modes. This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor
(or monitors) that have changed.
Each monitor has a frame buffer of some size at some position
in guest physical memory.
The set of frame buffers being tracked can change over time as monitors
are plugged and unplugged.

This is the 9th version of this patch.
Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   20 +-
 xen/arch/x86/hvm/hvm.c           |    8 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     |  951 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  111 -----
 xen/arch/x86/mm/p2m.c            |   11 +-
 xen/arch/x86/mm/paging.c         |   57 ++-
 xen/arch/x86/mm/shadow/common.c  |  353 +++++++-------
 xen/arch/x86/mm/shadow/multi.c   |  174 ++++---
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  227 +++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   15 +-
 xen/include/asm-x86/shadow.h     |    6 -
 16 files changed, 1535 insertions(+), 413 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 32122fd..cd4e1ef 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1563,15 +1563,23 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
+ *
+ * If nr is not a multiple of 64, only the first nr bits of bitmap
+ * are well defined.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index fcea52c..5a97ad3 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -1447,8 +1448,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
+                                 p2m_ram_rw) == p2m_ram_logdirty )
+            {
+                paging_mark_dirty_gpfn(v->domain, gfn);
+            }
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..4f599ed
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,951 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include <asm/hap.h>
+#include <asm/config.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          0
+#define DEBUG_allocating_dirty_vram_range     0
+#define DEBUG_high_water_mark_for_vram_ranges 0
+#define DEBUG_freeing_dirty_vram_range        0
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+#define DEBUG_alloc_paddr_inject_fault        0
+#define DEBUG_link_limit_exceeded             0
+
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        dirty_vram->ext_head = INVALID_PADDR;
+        dirty_vram->pl_free = INVALID_PADDR;
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        dv_paddr_link_ext_t *ext;
+        
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        
+        ext = remap_maddr(NULL, dirty_vram->ext_head);
+        while ( ext ) {
+            struct page_info *pg = virt_to_page(ext);
+            ext = remap_maddr(ext, ext->ext_link);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_pl_entry_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
+            pl_tab[i].mapping.pl_next = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                
+                plx = remap_maddr(NULL, range->pl_tab[i].mapping.pl_next);
+
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* Yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != INVALID_PADDR ) {
+                        plx = remap_maddr(plx, plx->pl_next);
+                    }
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
+                }
+                /* Unmap plx. */
+                remap_vaddr(plx, NULL);
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn ).
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+static paddr_t
+domain_page_map_to_maddr(void *va)
+{
+    if ( va )
+        return (domain_page_map_to_mfn(va) << PAGE_SHIFT) +
+            (((unsigned long)va) & ~PAGE_MASK);
+    else
+        return INVALID_PADDR;
+}
+
+void *
+remap_maddr(void *old_va, paddr_t new_ma)
+{
+    if ( old_va )
+        unmap_domain_page(old_va);
+    
+    if ( new_ma != INVALID_PADDR )
+        return map_domain_page(new_ma >> PAGE_SHIFT) + (new_ma & ~PAGE_MASK);
+    else
+        return NULL;
+}
+
+void *
+remap_vaddr(void *old_va, void *new_va)
+{
+    if ( old_va )
+        unmap_domain_page(old_va);
+    
+    if ( new_va )
+        /* Map page now in anticipation of future matching unmap */
+        map_domain_page(domain_page_map_to_mfn(new_va));
+
+    return new_va;
+}
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t *pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_ext_t *ext = NULL;
+    
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == INVALID_PADDR ) /* Yes. */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+        if ( pg == NULL )
+            goto out;
+
+        ext = __map_domain_page(pg);
+        if ( ext == NULL )
+        {
+            d->arch.paging.free_page(d, pg);
+            goto out;
+        }
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        /* Prepend new extension page to dirty_vram's list of same. */
+        ext->ext_link = dirty_vram->ext_head;
+        dirty_vram->ext_head = domain_page_map_to_maddr(ext);
+
+        /* Initialize and link together the new pl entries. */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next =
+                domain_page_map_to_maddr(&ext->entries[i+1]);
+        }
+        /* Mark the last entry's next pointer as "null". */
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = INVALID_PADDR;
+
+        /* Set the dirty_vram's free list pointer to the first entry. */
+        dirty_vram->pl_free = domain_page_map_to_maddr(&ext->entries[0]);
+    }
+    pl = remap_maddr(NULL, dirty_vram->pl_free);
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = INVALID_PADDR;
+ out:
+    if ( ext )
+        unmap_domain_page(ext);
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walk has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor's next member.
+ *
+ * After linking the predecessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and freeing it.
+ *
+ * Returns a pointer to @pl's successor.  This is where
+ * any iterative processing will resume.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                paddr_t *ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* Next pl. */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Extension mapping? */
+    if ( ppl ) /* Yes, relink around it. */
+    {
+        /* When done, resume processing at pl's successor. */
+        npl = remap_maddr(NULL, pl->pl_next);
+        (*ppl) = domain_page_map_to_maddr(npl);
+    }
+    else  /* pl points to entry in the main table. */
+    {
+        /*
+         * Move 2nd mapping to main table
+         * and free 2nd mapping.
+         */
+        dv_paddr_link_t *spl; /* 2nd mapping */
+        spl = remap_maddr(NULL, pl->pl_next);
+        /* Is there a 2nd mapping? */
+        if ( spl == NULL ) /* No. */
+        {
+            /* Invalidate the main table entry. */
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        /* Copy 2nd mapping into main table. */
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* Reprocess main table entry again. */
+        pl = spl; /* Prepare to free 2nd mapping. */
+    }
+
+    /* Prepare entry for prepending to the free list */
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = domain_page_map_to_maddr(pl);
+    remap_vaddr(pl, NULL); /* Unmap pl. */
+
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    paddr_t *ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = remap_vaddr(NULL, &range->pl_tab[ i ].mapping);
+    ppl = NULL;
+    /*
+     * Find matching entry (pl), if any, and its predecessor (ppl)
+     * in linked list.
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+
+        ppl = remap_vaddr(ppl, &pl->pl_next);
+        pl =  remap_maddr(pl, (*ppl));
+        len++;
+    }
+    
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* No, so we'll need to alloc a link. */
+        {
+            ASSERT(ppl != NULL); /* ppl points to predecessor's next link. */
+            
+#if DEBUG_alloc_paddr_inject_fault
+            {
+                static int counter;
+                
+                /* Test stuck_dirty logic for some cases */
+                if ( (++counter) % 4 == 0 )
+                {
+                    /* Simply mark the frame buffer page as always dirty */
+                    range->pl_tab[ i ].stuck_dirty = 1;
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] inject stuck dirty fault\n",
+                             gfn );
+                    goto out;
+                }
+            }
+#endif
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* Yes. */
+            {
+#if DEBUG_link_limit_exceeded
+                if ( !range->pl_tab[ i ].stuck_dirty )
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] link limit exceeded\n",
+                             gfn );
+#endif            
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                goto out;
+            }
+
+            /* Alloc new link */
+            pl = alloc_paddr_link(d);
+            /* Were we able to allocate a link? */
+            if ( pl == NULL ) /* No. */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] alloc failure\n",
+                         gfn );
+                
+                goto out;
+            }
+            /* Append new link to the list */
+            (*ppl) = domain_page_map_to_maddr(pl);
+        }
+
+        /* Did we find an entry for sl1ma? */
+        if ( pl->sl1ma != sl1ma ) /* No. */
+        {
+            /* pl must point to either a previously unused entry in the
+             * main table, or to a newly allocated link.
+             * In either case, the link's sl1ma should be 'null'.
+             */
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+        }
+        effective = 1; /* Take note that we're going to set an entry. */
+        if ( len > range->mappings_hwm )
+        {
+            /* Bookkeep the high water mark. */
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d "
+                     "free_pages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* Clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx\n",
+                     gfn, sl1ma);
+#endif
+            pl = free_paddr_link(d, ppl, pl);
+            effective = 1; /* Take note that we're clearing an entry. */
+        }
+    }
+ out:
+    remap_vaddr(pl,  NULL);
+    remap_vaddr(ppl, NULL);
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int 
+shadow_scan_dirty_flags(struct domain *d,
+                        dv_range_t *range,
+                        uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+    l1_pgentry_t *sl1e = NULL;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0;
+        
+        /* Does the frame buffer have an incomplete set of mappings? */
+        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* Yes. */
+            dirty = 1; /* Then always assume the page is dirty. */
+        else { /* The frame buffer's set of mappings is complete.  Scan it. */
+            paddr_t next = range->pl_tab[i].mapping.pl_next;
+            paddr_t sl1ma = range->pl_tab[i].mapping.sl1ma;
+            dv_paddr_link_t *pl = NULL;
+            for (;;)
+            {
+                if ( sl1ma == INVALID_PADDR )
+                    /* End of list or frame buffer page is unmapped. */
+                    break;
+
+                if ( sl1e ) /* Cleanup from previous iteration. */
+                    unmap_domain_page(sl1e);
+
+                sl1e = map_domain_page(sl1ma >> PAGE_SHIFT) +
+                    (sl1ma & ~PAGE_MASK);
+                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                {
+                    dirty = 1;
+                    /* Clear dirty so we can detect if page gets re-dirtied.
+                     * Note: this is atomic, so we may clear a
+                     * _PAGE_ACCESSED set by another processor.
+                     */
+                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                    flush_tlb = 1;
+                    /* Even though we now know the frame buffer page is dirty,
+                     * keep iterating to clear the dirty flag in all other
+                     * mappings. 
+                     */
+                }
+                
+                pl = remap_maddr(pl, next);
+                if ( !pl )
+                    break;
+                
+                next = pl->pl_next;
+                sl1ma = pl->sl1ma;
+            } /* for */
+
+            /* Unmap pl. */
+            remap_vaddr(NULL, pl);
+        }
+        if ( dirty )
+            /* Set the i'th bit in the output dirty_bitmap */
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+    
+    if ( sl1e )
+        unmap_domain_page(sl1e);
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    uint8_t *dirty_bitmap = NULL;
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
+     * is a pfn beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if ( !nr )
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+
+        flush_tlb |= shadow_scan_dirty_flags(d, range, dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range(), which interrogates each vram
+ * page's p2m type looking for pages that have been made writable.
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    uint8_t *dirty_bitmap = NULL;
+
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        if ( !paging_mode_log_dirty(d) )
+        {
+            hap_logdirty_init(d);
+            rc = paging_log_dirty_enable(d);
+            if ( rc )
+                goto out;
+        }
+
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+        
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( !dirty_vram ) 
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+        }
+        
+        range = dirty_vram_range_find(d, begin_pfn, nr);
+        if ( !range )
+        {
+            rc = -ENOMEM;
+            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+            
+            paging_unlock(d);
+            
+            /* Set l1e entries of range within P2M table to be read-only. */
+            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
+                                  p2m_ram_rw, p2m_ram_logdirty);
+            
+            flush_tlb_mask(d->domain_dirty_cpumask);
+            
+            memset(dirty_bitmap, 0xff, size); /* Consider all pages dirty. */
+        }
+        else
+        {
+            paging_unlock(d);
+            
+            domain_pause(d);
+            
+            /* Get the bitmap. */
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+            
+            domain_unpause(d);
+        }
+        
+        
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else {
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( dirty_vram )
+        {
+            /*
+             * If zero pages specified while tracking dirty vram
+             * then stop tracking
+             */
+            dirty_vram_free(d);
+        
+        }
+        
+        paging_unlock(d);
+    }
+out:
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index f353d3a..3ac54c9 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -53,117 +53,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-/*
- * hap_track_dirty_vram()
- * Create the domain's dv_dirty_vram struct on demand.
- * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
- * first encountered.
- * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
- * calling paging_log_dirty_range(), which interrogates each vram
- * page's p2m type looking for pages that have been made writable.
- */
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram;
-    uint8_t *dirty_bitmap = NULL;
-
-    if ( nr )
-    {
-        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
-
-        if ( !paging_mode_log_dirty(d) )
-        {
-            hap_logdirty_init(d);
-            rc = paging_log_dirty_enable(d);
-            if ( rc )
-                goto out;
-        }
-
-        rc = -ENOMEM;
-        dirty_bitmap = xzalloc_bytes(size);
-        if ( !dirty_bitmap )
-            goto out;
-
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
-            {
-                paging_unlock(d);
-                goto out;
-            }
-
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-        }
-
-        if ( begin_pfn != dirty_vram->begin_pfn ||
-             begin_pfn + nr != dirty_vram->end_pfn )
-        {
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-
-            paging_unlock(d);
-
-            /* set l1e entries of range within P2M table to be read-only. */
-            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
-                                  p2m_ram_rw, p2m_ram_logdirty);
-
-            flush_tlb_mask(d->domain_dirty_cpumask);
-
-            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
-        }
-        else
-        {
-            paging_unlock(d);
-
-            domain_pause(d);
-
-            /* get the bitmap */
-            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-
-            domain_unpause(d);
-        }
-
-        rc = -EFAULT;
-        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
-            rc = 0;
-    }
-    else
-    {
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( dirty_vram )
-        {
-            /*
-             * If zero pages specified while tracking dirty vram
-             * then stop tracking
-             */
-            xfree(dirty_vram);
-            d->arch.hvm_domain.dirty_vram = NULL;
-        }
-
-        paging_unlock(d);
-    }
-out:
-    if ( dirty_bitmap )
-        xfree(dirty_bitmap);
-
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index de1dd82..6f638a2 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -741,20 +741,23 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
-
     p2m_lock(p2m);
-    p2m->defer_nested_flush = 1;
 
+    p2m->defer_nested_flush = 1;
+    
     for ( gfn = start; gfn < end; gfn++ )
     {
         mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
         if ( pt == ot )
-            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
+                          p2m->default_access);
     }
-
+    
     p2m->defer_nested_flush = 0;
+
     if ( nestedhvm_enabled(d) )
         p2m_flush_nestedp2m(d);
+
     p2m_unlock(p2m);
 }
 
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index ac9bb1a..d59e8d6 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
     return ret;
 }
 
-/* Mark a page as dirty */
+/* Given a guest mfn, mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
     mfn_t gmfn;
-    int changed;
-    mfn_t mfn, *l4, *l3, *l2;
-    unsigned long *l1;
-    int i1, i2, i3, i4;
 
     gmfn = _mfn(guest_mfn);
 
@@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+    paging_mark_dirty_gpfn(d, pfn);
+}
+
+
+/* Given a guest pfn, mark a page as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
+{
+    int changed;
+    mfn_t mfn, *l4, *l3, *l2;
+    unsigned long *l1;
+    int i1, i2, i3, i4;
+    dv_range_t *range;
+    
     /* Shared MFNs should NEVER be marked dirty */
     BUG_ON(SHARED_M2P(pfn));
 
@@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
+    d->arch.paging.log_dirty.dirty_count++;
+    range = dirty_vram_range_find_gfn(d, pfn);
+    if ( range )
+        range->dirty_count++;
+
     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
     {
          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
@@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int i;
     unsigned long pfn;
+    dv_range_t *range;
+    unsigned int range_dirty_count;
 
+    paging_lock(d);
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( !range )
+    {
+        paging_unlock(d);
+        goto out;
+    }
+    
+    range_dirty_count = range->dirty_count;
+    range->dirty_count = 0;
+
+    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
+
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 begin_pfn + nr,
+                 range_dirty_count);
     /*
      * Set l1e entries of P2M table to be read-only.
      *
@@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
 
     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
     {
-        p2m_type_t pt;
-        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
-        if ( pt == p2m_ram_rw )
+        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
+             p2m_ram_rw )
             dirty_bitmap[i >> 3] |= (1 << (i & 7));
     }
 
     p2m_unlock(p2m);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
+
+ out:
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 292c1f7..b72a6bb 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3464,194 +3460,219 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 /**************************************************************************/
 /* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
 
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          0
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    dv_paddr_link_t *pl = NULL;
+    paddr_t *ppl = NULL;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        unsigned long map_mfn = INVALID_MFN;
-        void *map_sl1p = NULL;
-
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
+        {
+            
+            pl = remap_vaddr(pl, &range->pl_tab[ i ].mapping);
+            /* clean up from previous iteration */
+            ppl = remap_vaddr(ppl, NULL); /* unmap ppl */
 
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+            mappings = 0;
+            
+            while ( pl != NULL )
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e;
-                        unsigned long sl1mfn = paddr_to_pfn(sl1ma);
-
-                        if ( sl1mfn != map_mfn )
-                        {
-                            if ( map_sl1p )
-                                sh_unmap_domain_page(map_sl1p);
-                            map_sl1p = sh_map_domain_page(_mfn(sl1mfn));
-                            map_mfn = sl1mfn;
-                        }
-                        sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+               
+                if ( sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if ( sl1mn == mfn_x(smfn) ) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    /* ppl remains unchanged. */
+                }
+                else
+                {
+                    ppl = remap_vaddr(ppl, &pl->pl_next);
+                    pl =  remap_maddr(pl, (*ppl));
+                    mappings++;
                 }
-            }
-
-            if ( dirty )
-            {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
             }
         }
+        if ( mappings > max_mappings )
+            max_mappings = mappings;
+        
+        if ( unshadowed ) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
+                     "max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+        }
+    }
+ out:
+    remap_vaddr(pl,  NULL); /* unmap pl */
+    remap_vaddr(ppl, NULL); /* unmap ppl */
+    return;
+}
+
 
-        if ( map_sl1p )
-            sh_unmap_domain_page(map_sl1p);
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i];
+              x;
+              x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
+
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial\n",
+             range->begin_pfn, range->end_pfn
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index b79cd6c..922e01a 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to (or
+ * cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..727af05
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,227 @@
+/****************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this by
+ * recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 64
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    paddr_t pl_next;
+} dv_paddr_link_t;
+
+typedef struct dv_pl_entry {
+    dv_paddr_link_t mapping;
+    bool_t stuck_dirty;
+} dv_pl_entry_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    paddr_t ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( paddr_t ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the
+ * level 1 PTEs that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    paddr_t ext_head;/* head of list of extension pages */
+    paddr_t pl_free; /* free list of pl's within extension pages */
+    int nr_ranges;   /* bookkeeps number of ranges */
+    int ranges_hwm;  /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                paddr_t* ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+                        unsigned long first_pfn,
+                        unsigned long nr,
+                        XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+                     unsigned long begin_pfn,
+                     unsigned long nr,
+                     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+                              unsigned long begin_pfn,
+                              unsigned long nr,
+                              uint8_t *dirty_bitmap);
+
+/* Unmap a va and map a ma.
+ * This is used when walking a linked list in which
+ * the links are stored as ma's rather than va's.
+ * Each step in the walk unmaps the va returned
+ * by the previous iteration and maps the next link.
+ *
+ * Unmaps @old_va if not NULL.
+ *
+ * If @new_va is not INVALID_PADDR, maps it and returns the resulting
+ * va (which must be subsequently unmapped.) Else returns NULL.
+ */
+void *
+remap_maddr(void *old_va, paddr_t new_ma);
+
+
+/* Like the above but it acquires a mapping
+ * (possibly an additional mapping) on @new_va.
+ *
+ * Returns @new_va.
+ *
+ * Does no unmapping if @old_va is NULL.
+ * Does no mapping if @new_va is NULL.
+ */
+void *
+remap_vaddr(void *old_va, void *new_va);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..0cc7b05 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram  *dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index c3a8848..e22df38 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
                            int  (*disable_log_dirty)(struct domain *d),
                            void (*clean_dirty_bitmap)(struct domain *d));
 
-/* mark a page as dirty */
+/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
+/* mark a gpfn as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
+
+
 /* is this guest page dirty? 
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
@@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-02-05 11:38   ` Robert Phillips
@ 2013-02-07 11:04     ` Tim Deegan
  0 siblings, 0 replies; 35+ messages in thread
From: Tim Deegan @ 2013-02-07 11:04 UTC (permalink / raw)
  To: Robert Phillips; +Cc: Jan Beulich, xen-devel

At 06:38 -0500 on 05 Feb (1360046289), Robert Phillips wrote:
> > > +        ext = __map_domain_page(pg);
> > > +        /* Is unmapped in dirty_vram_free() */
> > 
> > Mappings from map_domain_page() can't be kept around like this.  They're
> > supposed to be short-term, and in systems where we don't have a full 1-1
> > map of memory (e.g. x86 once Jan's 16TB-support series goes in) there are a
> > limited number of mapping slots.
> > 
> > Jan, what do you recommend here?  These are pages of linked-list entries,
> > part of the p2m/mm overhead and so allocated from the guest's shadow
> > memory.  Are we going to have to allocate them from xenheap instead or is
> > there any way to avoid that?
> 
> One possible solution is to use a fixmap table. 
> Rather than storing VAs in these tables, the code could store
> entries containing physical_page/offset_in_page. 
> 
>  When managing the table
> the code could remap PA/offset into a single fixed virtual address
> (and clear its TLB).  Clearing a single TLB shouldn't hurt
> performance too terribly, right?

It shoud be OK, but I think you might as well use the existing
map_domain_page() code for this, rather than adding a new fixmap -- that
uses a ring of mappings and amortises a single TLB flush every time the
ring is filled up.

So I suggest storing a maddr in the table and using map/unmap_domain_page()
+ offset to access them.  The existing shadow_track_dirty_vram()
function does this (search for sl1ma).

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-24 11:25 ` Tim Deegan
@ 2013-02-05 11:38   ` Robert Phillips
  2013-02-07 11:04     ` Tim Deegan
  2013-03-01 20:52   ` Robert Phillips
  1 sibling, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2013-02-05 11:38 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: Jan Beulich, xen-devel

See below
-- rsp

> -----Original Message-----
> From: Tim Deegan [mailto:tim@xen.org]
> Sent: Thursday, January 24, 2013 6:25 AM
> To: Robert Phillips
> Cc: Jan Beulich; xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers
> in Xen
> 
> Hi,
> 
> At 14:28 -0500 on 21 Jan (1358778509), Robert Phillips wrote:
> > Support is provided for both shadow and hardware assisted paging (HAP)
> > modes. This code bookkeeps the set of video frame buffers (vram),
> > detects when the guest has modified any of those buffers and, upon
> > request, returns a bitmap of the modified pages.
> > This lets other software components re-paint the portions of the
> > monitor (or monitors) that have changed.
> > Each monitor has a frame buffer of some size at some position in guest
> > physical memory.
> > The set of frame buffers being tracked can change over time as
> > monitors are plugged and unplugged.
> 
> This doesn't apply to tip of xen-unstable.  Also:
> 
> > +        ext = __map_domain_page(pg);
> > +        /* Is unmapped in dirty_vram_free() */
> 
> Mappings from map_domain_page() can't be kept around like this.  They're
> supposed to be short-term, and in systems where we don't have a full 1-1
> map of memory (e.g. x86 once Jan's 16TB-support series goes in) there are a
> limited number of mapping slots.
> 
> Jan, what do you recommend here?  These are pages of linked-list entries,
> part of the p2m/mm overhead and so allocated from the guest's shadow
> memory.  Are we going to have to allocate them from xenheap instead or is
> there any way to avoid that?

One possible solution is to use a fixmap table. 
Rather than storing VAs in these tables, the code could store
entries containing physical_page/offset_in_page. 

 When managing the table
the code could remap PA/offset into a single fixed virtual address
(and clear its TLB).  Clearing a single TLB shouldn't hurt
performance too terribly, right?

(When I write "a single VA" I really mean "a small number of VAs".)

I do not know about fixmap tables, whether this is their
intended use and whether this solution would work.

> 
> Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-21 19:28 Robert Phillips
  2013-01-22  7:31 ` Pasi Kärkkäinen
@ 2013-01-24 11:25 ` Tim Deegan
  2013-02-05 11:38   ` Robert Phillips
  2013-03-01 20:52   ` Robert Phillips
  1 sibling, 2 replies; 35+ messages in thread
From: Tim Deegan @ 2013-01-24 11:25 UTC (permalink / raw)
  To: Robert Phillips; +Cc: Jan Beulich, xen-devel

Hi,

At 14:28 -0500 on 21 Jan (1358778509), Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.

This doesn't apply to tip of xen-unstable.  Also: 

> +        ext = __map_domain_page(pg);
> +        /* Is unmapped in dirty_vram_free() */

Mappings from map_domain_page() can't be kept around like this.  They're
supposed to be short-term, and in systems where we don't have a full 1-1
map of memory (e.g. x86 once Jan's 16TB-support series goes in) there
are a limited number of mapping slots.

Jan, what do you recommend here?  These are pages of linked-list
entries, part of the p2m/mm overhead and so allocated from the guest's
shadow memory.  Are we going to have to allocate them from xenheap
instead or is there any way to avoid that?

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-21 19:28 Robert Phillips
@ 2013-01-22  7:31 ` Pasi Kärkkäinen
  2013-01-24 11:25 ` Tim Deegan
  1 sibling, 0 replies; 35+ messages in thread
From: Pasi Kärkkäinen @ 2013-01-22  7:31 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

On Mon, Jan 21, 2013 at 02:28:29PM -0500, Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> 

Hello,

I think this patch has been submitted multiple times already,
so if you still need to submit it again please add a "version" string to the subject,
and also a changelog.. it helps understanding the changes.

Thanks,

-- Pasi

> Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
> ---
>  tools/libxc/xenctrl.h            |   20 +-
>  xen/arch/x86/hvm/hvm.c           |    9 +-
>  xen/arch/x86/mm/Makefile         |    1 +
>  xen/arch/x86/mm/dirty_vram.c     |  866 ++++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/hap/hap.c        |  111 -----
>  xen/arch/x86/mm/p2m.c            |   11 +-
>  xen/arch/x86/mm/paging.c         |   57 ++-
>  xen/arch/x86/mm/shadow/common.c  |  335 ++++++++-------
>  xen/arch/x86/mm/shadow/multi.c   |  174 ++++----
>  xen/arch/x86/mm/shadow/multi.h   |    7 +-
>  xen/arch/x86/mm/shadow/types.h   |    1 +
>  xen/include/asm-x86/dirty_vram.h |  202 +++++++++
>  xen/include/asm-x86/hap.h        |    4 -
>  xen/include/asm-x86/hvm/domain.h |    2 +-
>  xen/include/asm-x86/paging.h     |   15 +-
>  xen/include/asm-x86/shadow.h     |    6 -
>  16 files changed, 1424 insertions(+), 397 deletions(-)
>  create mode 100644 xen/arch/x86/mm/dirty_vram.c
>  create mode 100644 xen/include/asm-x86/dirty_vram.h
> 
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 32122fd..cd4e1ef 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1563,15 +1563,23 @@ int xc_hvm_inject_msi(
>      xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
>  
>  /*
> - * Track dirty bit changes in the VRAM area
> + * Track dirty bit changes in a VRAM region defined by
> + * [ first_pfn : first_pfn + nr - 1 ]
>   *
>   * All of this is done atomically:
> - * - get the dirty bitmap since the last call
> - * - set up dirty tracking area for period up to the next call
> - * - clear the dirty tracking area.
> + * - gets the dirty bitmap since the last call, all zeroes for
> + *   the first call with some new region
> + * - sets up a dirty tracking region for period up to the next call
> + * - clears the specified dirty tracking region.
>   *
> - * Returns -ENODATA and does not fill bitmap if the area has changed since the
> - * last call.
> + * Creating a new region causes any existing regions that it overlaps
> + * to be discarded.
> + *
> + * Specifying nr == 0 causes all regions to be discarded and
> + * disables dirty bit tracking.
> + *
> + * If nr is not a multiple of 64, only the first nr bits of bitmap
> + * are well defined.
>   */
>  int xc_hvm_track_dirty_vram(
>      xc_interface *xch, domid_t dom,
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 40c1ab2..8ddb91d 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -57,6 +57,7 @@
>  #include <asm/hvm/cacheattr.h>
>  #include <asm/hvm/trace.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/apic.h>
>  #include <public/sched.h>
> @@ -66,6 +67,7 @@
>  #include <asm/mem_event.h>
>  #include <asm/mem_access.h>
>  #include <public/mem_event.h>
> +#include "../mm/mm-locks.h"
>  
>  bool_t __read_mostly hvm_enabled;
>  
> @@ -1433,8 +1435,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
>           */
>          if ( access_w )
>          {
> -            paging_mark_dirty(v->domain, mfn_x(mfn));
> -            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
> +                                 p2m_ram_rw) == p2m_ram_logdirty )
> +            {
> +                paging_mark_dirty_gpfn(v->domain, gfn);
> +            }
>          }
>          rc = 1;
>          goto out_put_gfn;
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 73dcdf4..becd0c9 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -5,6 +5,7 @@ obj-y += paging.o
>  obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
> +obj-y += dirty_vram.o
>  obj-$(x86_64) += guest_walk_4.o
>  obj-$(x86_64) += mem_event.o
>  obj-$(x86_64) += mem_paging.o
> diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
> new file mode 100644
> index 0000000..75465ca
> --- /dev/null
> +++ b/xen/arch/x86/mm/dirty_vram.c
> @@ -0,0 +1,866 @@
> +/*
> + * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
> + * with support for multiple frame buffers.
> + *
> + * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
> + * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
> + * Parts of this code are Copyright (c) 2007 XenSource Inc.
> + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
> + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + */
> +
> +
> +#include <xen/types.h>
> +#include <xen/sched.h>
> +#include <xen/guest_access.h>
> +#include <asm/shadow.h>
> +#include <asm/dirty_vram.h>
> +#include <asm/hap.h>
> +#include <asm/config.h>
> +#include "mm-locks.h"
> +
> +#define DEBUG_stop_tracking_all_vram          0
> +#define DEBUG_allocating_dirty_vram_range     0
> +#define DEBUG_high_water_mark_for_vram_ranges 0
> +#define DEBUG_freeing_dirty_vram_range        0
> +#define DEBUG_allocate_paddr_links_page       0
> +#define DEBUG_update_vram_mapping             0
> +#define DEBUG_alloc_paddr_inject_fault        0
> +#define DEBUG_link_limit_exceeded             0
> +
> +/* Allocates domain's dirty_vram structure */
> +dv_dirty_vram_t *
> +dirty_vram_alloc(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
> +    if ( dirty_vram )
> +    {
> +        INIT_LIST_HEAD(&dirty_vram->range_head);
> +        INIT_LIST_HEAD(&dirty_vram->ext_head);
> +    }
> +    return dirty_vram;
> +}
> +
> +/*
> + * Returns domain's dirty_vram structure,
> + * allocating it if necessary
> + */
> +dv_dirty_vram_t *
> +dirty_vram_find_or_alloc(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( !dirty_vram )
> +        dirty_vram = dirty_vram_alloc(d);
> +    return dirty_vram;
> +}
> +
> +
> +/* Free domain's dirty_vram structure */
> +void dirty_vram_free(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr, *next;
> +        /* Free all the ranges */
> +        list_for_each_safe(curr, next, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +#if DEBUG_stop_tracking_all_vram
> +            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
> +                     range->begin_pfn, range->end_pfn);
> +#endif
> +            xfree(range->pl_tab);
> +            xfree(range);
> +        }
> +        /* Free all the extension pages */
> +        list_for_each_safe(curr, next, &dirty_vram->ext_head)
> +        {
> +            struct dv_paddr_link_ext *ext =
> +                container_of(
> +                    curr, struct dv_paddr_link_ext, ext_link);
> +            struct page_info *pg = __virt_to_page(ext);
> +            unmap_domain_page(ext);
> +            d->arch.paging.free_page(d, pg);
> +        }
> +
> +        xfree(dirty_vram);
> +        d->arch.hvm_domain.dirty_vram = NULL;
> +    }
> +}
> +
> +/* Returns dirty vram range containing gfn, NULL if none */
> +struct dv_range *
> +dirty_vram_range_find_gfn(struct domain *d,
> +                          unsigned long gfn)
> +{
> +    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr;
> +        list_for_each(curr, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +            if ( gfn >= range->begin_pfn &&
> +                 gfn <  range->end_pfn )
> +                return range;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
> + * NULL if none.
> + */
> +dv_range_t *
> +dirty_vram_range_find(struct domain *d,
> +                      unsigned long begin_pfn,
> +                      unsigned long nr)
> +{
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr;
> +        list_for_each(curr, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +            if ( begin_pfn == range->begin_pfn &&
> +                 end_pfn   == range->end_pfn )
> +                return range;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/* Allocate specified dirty_vram range */
> +static dv_range_t *
> +_dirty_vram_range_alloc(struct domain *d,
> +                        unsigned long begin_pfn,
> +                        unsigned long nr)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range = NULL;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_pl_entry_t *pl_tab = NULL;
> +    int i;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    ASSERT( dirty_vram != NULL );
> +
> +#if DEBUG_allocating_dirty_vram_range
> +    gdprintk(XENLOG_DEBUG,
> +             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
> +             begin_pfn, end_pfn,
> +             d->arch.hvm_domain.hap_enabled);
> +#endif
> +
> +    range = xzalloc(dv_range_t);
> +    if ( range == NULL )
> +        goto err_out;
> +
> +    INIT_LIST_HEAD(&range->range_link);
> +
> +    range->begin_pfn = begin_pfn;
> +    range->end_pfn = end_pfn;
> +
> +    if ( !hap_enabled(d) )
> +    {
> +        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
> +            goto err_out;
> +
> +        for ( i = 0; i != nr; i++ )
> +        {
> +            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
> +        }
> +    }
> +
> +    range->pl_tab = pl_tab;
> +    range->mappings_hwm = 1;
> +
> +    list_add(&range->range_link, &dirty_vram->range_head);
> +    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
> +    {
> +        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
> +#if DEBUG_high_water_mark_for_vram_ranges
> +        gdprintk(XENLOG_DEBUG,
> +                 "High water mark for number of vram ranges is now:%d\n",
> +                 dirty_vram->ranges_hwm);
> +#endif
> +    }
> +    return range;
> +
> + err_out:
> +    xfree(pl_tab);
> +    xfree(range);
> +    return NULL;
> +}
> +
> +
> +/* Frees specified dirty_vram range */
> +void dirty_vram_range_free(struct domain *d,
> +                           dv_range_t *range)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        int i, nr = range->end_pfn - range->begin_pfn;
> +
> +#if DEBUG_freeing_dirty_vram_range
> +        gdprintk(XENLOG_DEBUG,
> +                 "[%05lx:%05lx] Freeing dirty vram range\n",
> +                 range->begin_pfn, range->end_pfn);
> +#endif
> +
> +        if ( range->pl_tab )
> +        {
> +            for ( i = 0; i != nr; i++ )
> +            {
> +                dv_paddr_link_t *plx;
> +                plx = range->pl_tab[i].mapping.pl_next;
> +                /* Does current FB page have multiple mappings? */
> +                if ( plx ) /* yes */
> +                {
> +                    /* Find the last element in singly-linked list */
> +                    while ( plx->pl_next != NULL )
> +                        plx = plx->pl_next;
> +                    
> +                    /* Prepend whole list to the free list */
> +                    plx->pl_next = dirty_vram->pl_free;
> +                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
> +                }
> +            }
> +            xfree(range->pl_tab);
> +            range->pl_tab = NULL;
> +        }
> +
> +        /* Remove range from the linked list, free it, and adjust count*/
> +        list_del(&range->range_link);
> +        xfree(range);
> +        dirty_vram->nr_ranges--;
> +    }
> +}
> +
> +/*
> + * dirty_vram_range_alloc()
> + * This function ensures that the new range does not overlap any existing
> + * ranges -- deleting them if necessary -- and then calls
> + * _dirty_vram_range_alloc to actually allocate the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_alloc(struct domain *d,
> +                        unsigned long begin_pfn,
> +                        unsigned long nr)
> +{
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range;
> +    struct list_head *curr, *next;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    ASSERT( dirty_vram != NULL );
> +
> +    /*
> +     * Ranges cannot overlap so
> +     * free any range that overlaps [ begin_pfn .. end_pfn )
> +     */
> +    list_for_each_safe(curr, next, &dirty_vram->range_head)
> +    {
> +        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
> +        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
> +               ( begin_pfn <  rng->end_pfn   )
> +                 ) ||
> +             ( ( begin_pfn <= rng->begin_pfn ) &&
> +               ( rng->begin_pfn < end_pfn    )
> +                 ) )
> +        {
> +            /* Different tracking, tear the previous down. */
> +            dirty_vram_range_free(d, rng);
> +        }
> +    }
> +
> +    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
> +    if ( !range )
> +        goto out;
> +
> + out:
> +    return range;
> +}
> +
> +/*
> + * dirty_vram_range_find_or_alloc()
> + * Find the range for [begin_pfn:begin_pfn+nr).
> + * If it doesn't exists, create it.
> + */
> +dv_range_t *
> +dirty_vram_range_find_or_alloc(struct domain *d,
> +                                unsigned long begin_pfn,
> +                                unsigned long nr)
> +{
> +    dv_range_t *range;
> +    ASSERT( paging_locked_by_me(d) );
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if ( !range )
> +        range = dirty_vram_range_alloc(d, begin_pfn, nr);
> +    
> +    return range;
> +}
> +
> +
> +
> +/* Allocate a dv_paddr_link struct */
> +static
> +dv_paddr_link_t *
> +alloc_paddr_link(struct domain *d)
> +{
> +    dv_paddr_link_t * pl = NULL;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_ext_t *ext = NULL;
> +    
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
> +    /* Is the list of free pl's empty? */
> +    if ( dirty_vram->pl_free == NULL ) /* yes */
> +    {
> +        /*
> +         * Allocate another page of pl's.
> +         * Link them all together and point the free list head at them
> +         */
> +        int i;
> +        struct page_info *pg = d->arch.paging.alloc_page(d);
> +
> +        ext = __map_domain_page(pg);
> +        /* Is unmapped in dirty_vram_free() */
> +        if ( ext == NULL )
> +            goto out;
> +
> +#if DEBUG_allocate_paddr_links_page
> +        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
> +#endif
> +        list_add(&ext->ext_link, &dirty_vram->ext_head);
> +
> +        /* initialize and link together the new pl entries */
> +        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
> +        {
> +            ext->entries[i].sl1ma = INVALID_PADDR;
> +            ext->entries[i].pl_next = &ext->entries[i+1];
> +        }
> +        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
> +        dirty_vram->pl_free = &ext->entries[0];
> +    }
> +    pl = dirty_vram->pl_free;
> +    dirty_vram->pl_free = pl->pl_next;
> +
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = NULL;
> + out:
> +    return pl;
> +}
> +
> +
> +/*
> + * Free a paddr_link struct.
> + *
> + * The caller has walked the singly-linked list of elements
> + * that have, as their head, an element in a pl_tab cell.
> + * The list walks has reached the element to be freed.
> + * (Each element is a dv_paddr_link_t struct.)
> + *
> + * @pl points to the element to be freed.
> + * @ppl points to its predecessor element's next member.
> + *
> + * After linking the precessor to the element's successor,
> + * we can free @pl by prepending it to the list of free
> + * elements.
> + *
> + * As a boundary case (which happens to be the common case),
> + * @pl points to a cell in the pl_tab rather than to some
> + * extension element danging from that cell.
> + * We recognize this case because @ppl is NULL.
> + * In that case we promote the first extension element by
> + * copying it into the pl_tab cell and free it.
> + */
> +
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                dv_paddr_link_t **ppl,
> +                dv_paddr_link_t *pl)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_t *npl; /* next pl */
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* extension mapping? */
> +    if ( ppl ) /* yes. free it */
> +    {
> +        ASSERT(pl == (*ppl));
> +        (*ppl) = npl = pl->pl_next;
> +    }
> +    else  /* main table */
> +    {
> +        /*
> +         * move 2nd mapping to main table.
> +         * and free 2nd mapping
> +         */
> +        dv_paddr_link_t * spl;
> +        spl = pl->pl_next;
> +        if ( spl == NULL )
> +        {
> +            pl->sl1ma = INVALID_PADDR;
> +            return pl;
> +        }
> +        pl->sl1ma = spl->sl1ma;
> +        pl->pl_next = spl->pl_next;
> +        npl = pl; /* reprocess main table entry again */
> +        pl = spl;
> +    }
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = dirty_vram->pl_free;
> +    dirty_vram->pl_free = pl;
> +    return npl;
> +}
> +
> +
> +/*
> + * dirty_vram_range_update()
> + *
> + * This is called whenever a level 1 page table entry is modified.
> + * If the L1PTE is being cleared, the function removes any paddr_links
> + * that refer to it.
> + * If the L1PTE is being set to a frame buffer page, a paddr_link is
> + * created for that page's entry in pl_tab.
> + * Returns 1 iff entry found and set or cleared.
> + */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set)
> +{
> +    int effective = 0;
> +    dv_range_t *range;
> +    unsigned long i;
> +    dv_paddr_link_t *pl;
> +    dv_paddr_link_t **ppl;
> +    int len = 0;
> +
> +    ASSERT(paging_locked_by_me(d));
> +    range = dirty_vram_range_find_gfn(d, gfn);
> +    if ( !range )
> +        return effective;
> +
> +    
> +    i = gfn - range->begin_pfn;
> +    pl = &range->pl_tab[ i ].mapping;
> +    ppl = NULL;
> +
> +    /*
> +     * find matching entry (pl), if any, and its predecessor
> +     * in linked list (ppl)
> +     */
> +    while ( pl != NULL )
> +    {
> +        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
> +            break;
> +            
> +        ppl = &pl->pl_next;
> +        pl = *ppl;
> +        len++;
> +    }
> +
> +    if ( set )
> +    {
> +        /* Did we find sl1ma in either the main table or the linked list? */
> +        if ( pl == NULL ) /* no, so we'll need to alloc a link */
> +        {
> +            ASSERT(ppl != NULL);
> +            
> +#if DEBUG_alloc_paddr_inject_fault
> +            {
> +                static int counter;
> +                
> +                /* Test stuck_dirty logic for some cases */
> +                if ( (++counter) % 4 == 0 )
> +                {
> +                    /* Simply mark the frame buffer page as always dirty */
> +                    range->pl_tab[ i ].stuck_dirty = 1;
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] inject stuck dirty fault\n",
> +                             gfn );
> +                    goto out;
> +                }
> +            }
> +#endif
> +            /*
> +             * Have we reached the limit of mappings we're willing
> +             * to bookkeep?
> +             */
> +            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* yes */
> +            {
> +#if DEBUG_link_limit_exceeded
> +                if ( !range->pl_tab[ i ].stuck_dirty )
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] link limit exceeded\n",
> +                             gfn );
> +#endif            
> +                /* Simply mark the frame buffer page as always dirty */
> +                range->pl_tab[ i ].stuck_dirty = 1;
> +                goto out;
> +            }
> +
> +            /* alloc link and append it to list */
> +            (*ppl) = pl = alloc_paddr_link(d);
> +            /* Were we able to allocate a link? */
> +            if ( pl == NULL ) /* no */
> +            {
> +                /* Simply mark the frame buffer page as always dirty */
> +                range->pl_tab[ i ].stuck_dirty = 1;
> +                
> +                gdprintk(XENLOG_DEBUG,
> +                         "[%lx] alloc failure\n",
> +                         gfn );
> +                
> +                goto out;
> +            }
> +        }
> +        if ( pl->sl1ma != sl1ma )
> +        {
> +            ASSERT(pl->sl1ma == INVALID_PADDR);
> +            pl->sl1ma = sl1ma;
> +            range->nr_mappings++;
> +        }
> +        effective = 1;
> +        if ( len > range->mappings_hwm )
> +        {
> +            range->mappings_hwm = len;
> +#if DEBUG_update_vram_mapping
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d "
> +                     "freepages:%d\n",
> +                     gfn, sl1ma,
> +                     range->mappings_hwm,
> +                     range->nr_mappings,
> +                     d->arch.paging.shadow.free_pages);
> +#endif
> +        }
> +    }
> +    else /* clear */
> +    {
> +        if ( pl && pl->sl1ma == sl1ma )
> +        {
> +#if DEBUG_update_vram_mapping
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
> +                     gfn, sl1ma,
> +                     range->nr_mappings - 1);
> +#endif
> +            free_paddr_link(d, ppl, pl);
> +            --range->nr_mappings;
> +            effective = 1;
> +        }
> +    }
> + out:
> +    return effective;
> +}
> +
> +
> +/*
> + * shadow_scan_dirty_flags()
> + * This produces a dirty bitmap for the range by examining every
> + * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
> + * It tests and clears each such L1PTE's dirty flag.
> + */
> +static
> +int shadow_scan_dirty_flags(struct domain *d,
> +                            dv_range_t *range,
> +                            uint8_t *dirty_bitmap)
> +{
> +    int flush_tlb = 0;
> +    unsigned long i;
> +    unsigned long nr = range->end_pfn - range->begin_pfn;
> +    l1_pgentry_t *sl1e = NULL;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* Iterate over VRAM to track dirty bits. */
> +    for ( i = 0; i < nr; i++ )
> +    {
> +        int dirty = 0, len = 1;
> +        dv_paddr_link_t *pl;
> +        /* Does the frame buffer have an incomplete set of mappings? */
> +        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* yes */
> +            dirty = 1;
> +        else /* The frame buffer's set of mappings is complete.  Scan it. */
> +            for ( pl = &range->pl_tab[i].mapping;
> +                  pl;
> +                  pl = pl->pl_next, len++ )
> +            {
> +                paddr_t sl1ma = pl->sl1ma;
> +                if ( sl1ma == INVALID_PADDR ) /* FB page is unmapped */
> +                    continue;
> +
> +                if ( sl1e ) /* cleanup from previous iteration */
> +                    unmap_domain_page(sl1e);
> +
> +                sl1e = map_domain_page(sl1ma >> PAGE_SHIFT);
> +                sl1e += (sl1ma & (PAGE_SIZE - 1 ))/sizeof(*sl1e);
> +                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
> +                {
> +                    dirty = 1;
> +                    /* Clear dirty so we can detect if page gets re-dirtied.
> +                     * Note: this is atomic, so we may clear a
> +                     * _PAGE_ACCESSED set by another processor.
> +                     */
> +                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
> +                    flush_tlb = 1;
> +                }
> +            } /* for */
> +        
> +        if ( dirty )
> +            dirty_bitmap[i >> 3] |= (1 << (i & 7));
> +
> +    }
> +    
> +    if ( sl1e )
> +        unmap_domain_page(sl1e);
> +
> +    return flush_tlb;
> +}
> +
> +
> +/*
> + * shadow_track_dirty_vram()
> + * This is the API called by the guest to determine which pages in the range
> + * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
> + * It creates the domain's dv_dirty_vram on demand.
> + * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
> + * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
> + * It copies the dirty bitmask into guest storage.
> + */
> +int shadow_track_dirty_vram(struct domain *d,
> +                            unsigned long begin_pfn,
> +                            unsigned long nr,
> +                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    int rc = 0;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    int flush_tlb = 0;
> +    dv_range_t *range;
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +    uint8_t *dirty_bitmap = NULL;
> +
> +    /*
> +     * This range test is tricky.
> +     *
> +     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
> +     * is a pfn beyond the end of the range.
> +     *
> +     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
> +     * invalid PFN.
> +     *
> +     * If end_pfn is beyond *that* then the range is invalid.
> +     */
> +    if ( end_pfn < begin_pfn
> +         || begin_pfn > p2m->max_mapped_pfn
> +         || end_pfn > p2m->max_mapped_pfn + 1 )
> +        return -EINVAL;
> +
> +    paging_lock(d);
> +
> +    if ( !nr )
> +    {
> +        dirty_vram_free(d);
> +        goto out;
> +    }
> +
> +    if ( guest_handle_is_null(guest_dirty_bitmap) )
> +        goto out;
> +
> +    if ( !dirty_vram_find_or_alloc(d) )
> +    {
> +        rc = -ENOMEM;
> +        goto out;
> +    }
> +
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if ( !range )
> +    {
> +        range = dirty_vram_range_alloc(d, begin_pfn, nr);
> +        if ( range )
> +            sh_find_all_vram_mappings(d->vcpu[0], range);
> +    }
> +    if ( range )
> +    {
> +        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
> +        
> +        rc = -ENOMEM;
> +        dirty_bitmap = xzalloc_bytes( size );
> +        if ( !dirty_bitmap )
> +            goto out;
> +
> +	flush_tlb |= shadow_scan_dirty_flags(d, range, dirty_bitmap);
> +
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           dirty_bitmap,
> +                           size) == 0 )
> +            rc = 0;
> +    }
> +    
> +    if ( flush_tlb )
> +        flush_tlb_mask(d->domain_dirty_cpumask);
> +
> +out:
> +    paging_unlock(d);
> +    
> +    if ( dirty_bitmap )
> +        xfree(dirty_bitmap);
> +    return rc;
> +}
> +
> +
> +/************************************************/
> +/*          HAP VRAM TRACKING SUPPORT           */
> +/************************************************/
> +
> +/*
> + * hap_track_dirty_vram()
> + * Create the domain's dv_dirty_vram struct on demand.
> + * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
> + * first encountered.
> + * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
> + * calling paging_log_dirty_range(), which interrogates each vram
> + * page's p2m type looking for pages that have been made writable.
> + */
> +int hap_track_dirty_vram(struct domain *d,
> +                         unsigned long begin_pfn,
> +                         unsigned long nr,
> +                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    long rc = 0;
> +    dv_dirty_vram_t *dirty_vram;
> +    uint8_t *dirty_bitmap = NULL;
> +
> +    if ( nr )
> +    {
> +        dv_range_t *range = NULL;
> +        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
> +        
> +        if ( !paging_mode_log_dirty(d) )
> +        {
> +            hap_logdirty_init(d);
> +            rc = paging_log_dirty_enable(d);
> +            if ( rc )
> +                goto out;
> +        }
> +
> +        rc = -ENOMEM;
> +        dirty_bitmap = xzalloc_bytes( size );
> +        if ( !dirty_bitmap )
> +            goto out;
> +        
> +        paging_lock(d);
> +        
> +        dirty_vram = d->arch.hvm_domain.dirty_vram;
> +        if ( !dirty_vram ) 
> +        {
> +            rc = -ENOMEM;
> +            if ( !(dirty_vram = dirty_vram_alloc(d)) )
> +            {
> +                paging_unlock(d);
> +                goto out;
> +            }
> +        }
> +        
> +        range = dirty_vram_range_find(d, begin_pfn, nr);
> +        if ( !range )
> +        {
> +            rc = -ENOMEM;
> +            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
> +            {
> +                paging_unlock(d);
> +                goto out;
> +            }
> +            
> +            paging_unlock(d);
> +            
> +            /* set l1e entries of range within P2M table to be read-only. */
> +            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
> +                                  p2m_ram_rw, p2m_ram_logdirty);
> +            
> +            flush_tlb_mask(d->domain_dirty_cpumask);
> +            
> +            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
> +        }
> +        else
> +        {
> +            paging_unlock(d);
> +            
> +            domain_pause(d);
> +            
> +            /* get the bitmap */
> +            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
> +            
> +            domain_unpause(d);
> +        }
> +        
> +        
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           dirty_bitmap,
> +                           size) == 0 )
> +        {
> +            rc = 0;
> +        }
> +    }
> +    else {
> +        paging_lock(d);
> +        
> +        dirty_vram = d->arch.hvm_domain.dirty_vram;
> +        if ( dirty_vram )
> +        {
> +            /*
> +             * If zero pages specified while tracking dirty vram
> +             * then stop tracking
> +             */
> +            dirty_vram_free(d);
> +        
> +        }
> +        
> +        paging_unlock(d);
> +    }
> +out:
> +    if ( dirty_bitmap )
> +        xfree(dirty_bitmap);
> +    
> +    return rc;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
> index a95ccbf..f7d979b 100644
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -53,117 +53,6 @@
>  #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
>  
>  /************************************************/
> -/*          HAP VRAM TRACKING SUPPORT           */
> -/************************************************/
> -
> -/*
> - * hap_track_dirty_vram()
> - * Create the domain's dv_dirty_vram struct on demand.
> - * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
> - * first encountered.
> - * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
> - * calling paging_log_dirty_range(), which interrogates each vram
> - * page's p2m type looking for pages that have been made writable.
> - */
> -
> -int hap_track_dirty_vram(struct domain *d,
> -                         unsigned long begin_pfn,
> -                         unsigned long nr,
> -                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> -{
> -    long rc = 0;
> -    struct sh_dirty_vram *dirty_vram;
> -    uint8_t *dirty_bitmap = NULL;
> -
> -    if ( nr )
> -    {
> -        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
> -
> -        if ( !paging_mode_log_dirty(d) )
> -        {
> -            hap_logdirty_init(d);
> -            rc = paging_log_dirty_enable(d);
> -            if ( rc )
> -                goto out;
> -        }
> -
> -        rc = -ENOMEM;
> -        dirty_bitmap = xzalloc_bytes(size);
> -        if ( !dirty_bitmap )
> -            goto out;
> -
> -        paging_lock(d);
> -
> -        dirty_vram = d->arch.hvm_domain.dirty_vram;
> -        if ( !dirty_vram )
> -        {
> -            rc = -ENOMEM;
> -            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
> -            {
> -                paging_unlock(d);
> -                goto out;
> -            }
> -
> -            d->arch.hvm_domain.dirty_vram = dirty_vram;
> -        }
> -
> -        if ( begin_pfn != dirty_vram->begin_pfn ||
> -             begin_pfn + nr != dirty_vram->end_pfn )
> -        {
> -            dirty_vram->begin_pfn = begin_pfn;
> -            dirty_vram->end_pfn = begin_pfn + nr;
> -
> -            paging_unlock(d);
> -
> -            /* set l1e entries of range within P2M table to be read-only. */
> -            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
> -                                  p2m_ram_rw, p2m_ram_logdirty);
> -
> -            flush_tlb_mask(d->domain_dirty_cpumask);
> -
> -            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
> -        }
> -        else
> -        {
> -            paging_unlock(d);
> -
> -            domain_pause(d);
> -
> -            /* get the bitmap */
> -            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
> -
> -            domain_unpause(d);
> -        }
> -
> -        rc = -EFAULT;
> -        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
> -            rc = 0;
> -    }
> -    else
> -    {
> -        paging_lock(d);
> -
> -        dirty_vram = d->arch.hvm_domain.dirty_vram;
> -        if ( dirty_vram )
> -        {
> -            /*
> -             * If zero pages specified while tracking dirty vram
> -             * then stop tracking
> -             */
> -            xfree(dirty_vram);
> -            d->arch.hvm_domain.dirty_vram = NULL;
> -        }
> -
> -        paging_unlock(d);
> -    }
> -out:
> -    if ( dirty_bitmap )
> -        xfree(dirty_bitmap);
> -
> -    return rc;
> -}
> -
> -/************************************************/
>  /*            HAP LOG DIRTY SUPPORT             */
>  /************************************************/
>  
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index 258f46e..41d0fe3 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -690,20 +690,23 @@ void p2m_change_type_range(struct domain *d,
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>      BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
> -
>      p2m_lock(p2m);
> -    p2m->defer_nested_flush = 1;
>  
> +    p2m->defer_nested_flush = 1;
> +    
>      for ( gfn = start; gfn < end; gfn++ )
>      {
>          mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
>          if ( pt == ot )
> -            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
> +            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
> +                          p2m->default_access);
>      }
> -
> +    
>      p2m->defer_nested_flush = 0;
> +
>      if ( nestedhvm_enabled(d) )
>          p2m_flush_nestedp2m(d);
> +
>      p2m_unlock(p2m);
>  }
>  
> diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
> index a5cdbd1..cd44f6e 100644
> --- a/xen/arch/x86/mm/paging.c
> +++ b/xen/arch/x86/mm/paging.c
> @@ -27,6 +27,7 @@
>  #include <asm/p2m.h>
>  #include <asm/hap.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <xen/numa.h>
>  #include <xsm/xsm.h>
>  
> @@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
>      return ret;
>  }
>  
> -/* Mark a page as dirty */
> +/* Given a guest mfn, mark a page as dirty */
>  void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>  {
>      unsigned long pfn;
>      mfn_t gmfn;
> -    int changed;
> -    mfn_t mfn, *l4, *l3, *l2;
> -    unsigned long *l1;
> -    int i1, i2, i3, i4;
>  
>      gmfn = _mfn(guest_mfn);
>  
> @@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>  
>      /* We /really/ mean PFN here, even for non-translated guests. */
>      pfn = get_gpfn_from_mfn(mfn_x(gmfn));
> +    paging_mark_dirty_gpfn(d, pfn);
> +}
> +
> +
> +/* Given a guest pfn, mark a page as dirty */
> +void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
> +{
> +    int changed;
> +    mfn_t mfn, *l4, *l3, *l2;
> +    unsigned long *l1;
> +    int i1, i2, i3, i4;
> +    dv_range_t *range;
> +    
>      /* Shared MFNs should NEVER be marked dirty */
>      BUG_ON(SHARED_M2P(pfn));
>  
> @@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>      /* Recursive: this is called from inside the shadow code */
>      paging_lock_recursive(d);
>  
> +    d->arch.paging.log_dirty.dirty_count++;
> +    range = dirty_vram_range_find_gfn(d, pfn);
> +    if ( range )
> +        range->dirty_count++;
> +
>      if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
>      {
>           d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
> @@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>      int i;
>      unsigned long pfn;
> +    dv_range_t *range;
> +    unsigned int range_dirty_count;
>  
> +    paging_lock(d);
> +    range = dirty_vram_range_find_gfn(d, begin_pfn);
> +    if ( !range )
> +    {
> +        paging_unlock(d);
> +        goto out;
> +    }
> +    
> +    range_dirty_count = range->dirty_count;
> +    range->dirty_count = 0;
> +
> +    paging_unlock(d);
> +    
> +    if ( !range_dirty_count)
> +        goto out;
> +
> +    PAGING_DEBUG(LOGDIRTY,
> +                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
> +                 d->domain_id,
> +                 begin_pfn,
> +                 begin_pfn + nr,
> +                 range_dirty_count);
>      /*
>       * Set l1e entries of P2M table to be read-only.
>       *
> @@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
>  
>      for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
>      {
> -        p2m_type_t pt;
> -        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
> -        if ( pt == p2m_ram_rw )
> +        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
> +             p2m_ram_rw )
>              dirty_bitmap[i >> 3] |= (1 << (i & 7));
>      }
>  
>      p2m_unlock(p2m);
>  
>      flush_tlb_mask(d->domain_dirty_cpumask);
> +
> + out:
> +    return;
>  }
>  
>  /* Note that this function takes three function pointers. Callers must supply
> diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
> index ce79131..1e4b880 100644
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -36,6 +36,7 @@
>  #include <asm/current.h>
>  #include <asm/flushtlb.h>
>  #include <asm/shadow.h>
> +#include <asm/dirty_vram.h>
>  #include <xen/numa.h>
>  #include "private.h"
>  
> @@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
>       * calls now that we've torn down the bitmap */
>      d->arch.paging.mode &= ~PG_log_dirty;
>  
> -    if (d->arch.hvm_domain.dirty_vram) {
> -        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
> -        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
> -        xfree(d->arch.hvm_domain.dirty_vram);
> -        d->arch.hvm_domain.dirty_vram = NULL;
> -    }
> +    dirty_vram_free(d);
>  
>      paging_unlock(d);
>  
> @@ -3464,178 +3460,217 @@ void shadow_clean_dirty_bitmap(struct domain *d)
>  
>  /**************************************************************************/
>  /* VRAM dirty tracking support */
> -int shadow_track_dirty_vram(struct domain *d,
> -                            unsigned long begin_pfn,
> -                            unsigned long nr,
> -                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
> -{
> -    int rc;
> -    unsigned long end_pfn = begin_pfn + nr;
> -    unsigned long dirty_size = (nr + 7) / 8;
> -    int flush_tlb = 0;
> -    unsigned long i;
> -    p2m_type_t t;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> -    struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
> -    if (end_pfn < begin_pfn
> -            || begin_pfn > p2m->max_mapped_pfn
> -            || end_pfn >= p2m->max_mapped_pfn)
> -        return -EINVAL;
>  
> -    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
> -    p2m_lock(p2m_get_hostp2m(d));
> -    paging_lock(d);
> +/* Support functions for shadow-based dirty VRAM code */
>  
> -    if ( dirty_vram && (!nr ||
> -             ( begin_pfn != dirty_vram->begin_pfn
> -            || end_pfn   != dirty_vram->end_pfn )) )
> -    {
> -        /* Different tracking, tear the previous down. */
> -        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
> -        xfree(dirty_vram->sl1ma);
> -        xfree(dirty_vram->dirty_bitmap);
> -        xfree(dirty_vram);
> -        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
> -    }
> +#define DEBUG_unshadow_sl1ma                  0          
> +#define DEBUG_unshadow_sl1ma_detail           0
> +#define DEBUG_count_initial_mappings          0
>  
> -    if ( !nr )
> +/* smfn is no longer a shadow page.  Remove it from any
> + * dirty vram range mapping. */
> +void
> +dirty_vram_delete_shadow(struct vcpu *v,
> +                         unsigned long gfn,
> +                         unsigned int shadow_type, 
> +                         mfn_t smfn)
> +{
> +    static unsigned int l1_shadow_mask = 
> +          1 << SH_type_l1_32_shadow
> +        | 1 << SH_type_fl1_32_shadow
> +        | 1 << SH_type_l1_pae_shadow
> +        | 1 << SH_type_fl1_pae_shadow
> +        | 1 << SH_type_l1_64_shadow
> +        | 1 << SH_type_fl1_64_shadow
> +        ;
> +    struct domain *d = v->domain;
> +    dv_dirty_vram_t *dirty_vram;
> +    struct list_head *curr, *next;
> +    
> +    ASSERT(paging_locked_by_me(d));
> +    /* Ignore all but level 1 shadows */
> +    
> +    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
>      {
> -        rc = 0;
>          goto out;
>      }
>  
> -    /* This should happen seldomly (Video mode change),
> -     * no need to be careful. */
> +    dirty_vram = d->arch.hvm_domain.dirty_vram;
>      if ( !dirty_vram )
>      {
> -        /* Throw away all the shadows rather than walking through them 
> -         * up to nr times getting rid of mappings of each pfn */
> -        shadow_blow_tables(d);
> -
> -        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
> -
> -        rc = -ENOMEM;
> -        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
> -            goto out;
> -        dirty_vram->begin_pfn = begin_pfn;
> -        dirty_vram->end_pfn = end_pfn;
> -        d->arch.hvm_domain.dirty_vram = dirty_vram;
> -
> -        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
> -            goto out_dirty_vram;
> -        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
> -
> -        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
> -            goto out_sl1ma;
> -
> -        dirty_vram->last_dirty = NOW();
> -
> -        /* Tell the caller that this time we could not track dirty bits. */
> -        rc = -ENODATA;
> -    }
> -    else if (dirty_vram->last_dirty == -1)
> -    {
> -        /* still completely clean, just copy our empty bitmap */
> -        rc = -EFAULT;
> -        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
> -            rc = 0;
> +        goto out;
>      }
> -    else
> +        
> +    list_for_each_safe(curr, next, &dirty_vram->range_head)
>      {
> -        /* Iterate over VRAM to track dirty bits. */
> -        for ( i = 0; i < nr; i++ ) {
> -            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
> -            struct page_info *page;
> -            int dirty = 0;
> -            paddr_t sl1ma = dirty_vram->sl1ma[i];
> -
> -            if (mfn_x(mfn) == INVALID_MFN)
> -            {
> -                dirty = 1;
> -            }
> -            else
> +        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +        unsigned long i;
> +        int max_mappings = 1, mappings = 0;
> +        int unshadowed = 0;
> +        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
> +        {
> +            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
> +            dv_paddr_link_t **ppl = NULL;
> +            mappings = 0;
> +            
> +            while ( pl != NULL )
>              {
> -                page = mfn_to_page(mfn);
> -                switch (page->u.inuse.type_info & PGT_count_mask)
> -                {
> -                case 0:
> -                    /* No guest reference, nothing to track. */
> -                    break;
> -                case 1:
> -                    /* One guest reference. */
> -                    if ( sl1ma == INVALID_PADDR )
> -                    {
> -                        /* We don't know which sl1e points to this, too bad. */
> -                        dirty = 1;
> -                        /* TODO: Heuristics for finding the single mapping of
> -                         * this gmfn */
> -                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
> -                    }
> -                    else
> -                    {
> -                        /* Hopefully the most common case: only one mapping,
> -                         * whose dirty bit we can use. */
> -                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
> -
> -                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
> -                        {
> -                            dirty = 1;
> -                            /* Note: this is atomic, so we may clear a
> -                             * _PAGE_ACCESSED set by another processor. */
> -                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
> -                            flush_tlb = 1;
> -                        }
> -                    }
> -                    break;
> -                default:
> -                    /* More than one guest reference,
> -                     * we don't afford tracking that. */
> -                    dirty = 1;
> +                paddr_t sl1ma = pl->sl1ma;
> +                unsigned long sl1mn;
> +                
> +                if ( sl1ma == INVALID_PADDR )
>                      break;
> +                
> +                sl1mn = sl1ma >> PAGE_SHIFT;
> +                if ( sl1mn == mfn_x(smfn) ) {
> +#if DEBUG_unshadow_sl1ma_detail
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
> +                             mfn_x(smfn),
> +                             range->begin_pfn + i,
> +                             sl1ma);
> +#endif
> +                    unshadowed++;
> +                    pl = free_paddr_link(d, ppl, pl);
> +                    --range->nr_mappings;
> +                }
> +                else
> +                {
> +                    ppl = &pl->pl_next;
> +                    pl = *ppl;
> +                    mappings++;
>                  }
>              }
> -
> -            if ( dirty )
> +        }
> +        if ( mappings > max_mappings )
> +            max_mappings = mappings;
> +        
> +        if ( unshadowed ) {
> +#if DEBUG_unshadow_sl1ma
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
> +                     "max_mappings:%d\n",
> +                     mfn_x(smfn),
> +                     range->begin_pfn, range->end_pfn,
> +                     unshadowed, range->nr_mappings, max_mappings);
> +#endif
> +            if ( range->nr_mappings == 0 )
>              {
> -                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -                dirty_vram->last_dirty = NOW();
> +                dirty_vram_range_free(d, range);                    
>              }
>          }
> +    }
> + out:
> +    return;
> +}
> +
>  
> -        rc = -EFAULT;
> -        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
> -            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
> -            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
> +typedef int (*hash_pfn_callback_t)(struct vcpu *v,
> +                                   mfn_t smfn,
> +                                   unsigned long begin_pfn,
> +                                   unsigned long end_pfn,
> +                                   int *removed);
> +
> +static int hash_pfn_foreach(struct vcpu *v, 
> +                            unsigned int callback_mask, 
> +                            hash_pfn_callback_t callbacks[], 
> +                            unsigned long begin_pfn,
> +                            unsigned long end_pfn)
> +/* Walk the hash table looking at the types of the entries and 
> + * calling the appropriate callback function for each entry. 
> + * The mask determines which shadow types we call back for, and the array
> + * of callbacks tells us which function to call.
> + * Any callback may return non-zero to let us skip the rest of the scan. 
> + *
> + * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
> + * then return non-zero to terminate the scan. */
> +{
> +    int i, done = 0, removed = 0;
> +    struct domain *d = v->domain;
> +    struct page_info *x;
> +
> +    /* Say we're here, to stop hash-lookups reordering the chains */
> +    ASSERT(paging_locked_by_me(d));
> +    ASSERT(d->arch.paging.shadow.hash_walking == 0);
> +    d->arch.paging.shadow.hash_walking = 1;
> +
> +    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
> +    {
> +        /* WARNING: This is not safe against changes to the hash table.
> +         * The callback *must* return non-zero if it has inserted or
> +         * deleted anything from the hash (lookups are OK, though). */
> +        for ( x = d->arch.paging.shadow.hash_table[i];
> +              x;
> +              x = next_shadow(x) )
> +        {
> +            if ( callback_mask & (1 << x->u.sh.type) )
>              {
> -                /* was clean for more than two seconds, try to disable guest
> -                 * write access */
> -                for ( i = begin_pfn; i < end_pfn; i++ ) {
> -                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
> -                    if (mfn_x(mfn) != INVALID_MFN)
> -                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
> -                }
> -                dirty_vram->last_dirty = -1;
> +                ASSERT(x->u.sh.type <= 15);
> +                ASSERT(callbacks[x->u.sh.type] != NULL);
> +                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
> +                                               begin_pfn, end_pfn,
> +                                               &removed);
> +                if ( done ) break;
>              }
> -            rc = 0;
>          }
> +        if ( done ) break; 
>      }
> -    if ( flush_tlb )
> -        flush_tlb_mask(d->domain_dirty_cpumask);
> -    goto out;
> +    d->arch.paging.shadow.hash_walking = 0;
> +    return removed;
> +}
>  
> -out_sl1ma:
> -    xfree(dirty_vram->sl1ma);
> -out_dirty_vram:
> -    xfree(dirty_vram);
> -    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
> +void sh_find_all_vram_mappings(struct vcpu *v,
> +                               dv_range_t *range)
> +{
> +    /* Dispatch table for getting per-type functions */
> +    static hash_pfn_callback_t callbacks[SH_type_unused] = {
> +        NULL, /* none    */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
> +        NULL, /* l2_32   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
> +        NULL, /* l2_pae  */
> +        NULL, /* l2h_pae */
> +#if CONFIG_PAGING_LEVELS >= 4
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
> +#else
> +        NULL, /* l1_64   */
> +        NULL, /* fl1_64  */
> +#endif
> +        NULL, /* l2_64   */
> +        NULL, /* l2h_64  */
> +        NULL, /* l3_64   */
> +        NULL, /* l4_64   */
> +        NULL, /* p2m     */
> +        NULL  /* unused  */
> +    };
>  
> -out:
> -    paging_unlock(d);
> -    p2m_unlock(p2m_get_hostp2m(d));
> -    return rc;
> +    static unsigned int callback_mask = 
> +          1 << SH_type_l1_32_shadow
> +        | 1 << SH_type_fl1_32_shadow
> +        | 1 << SH_type_l1_pae_shadow
> +        | 1 << SH_type_fl1_pae_shadow
> +        | 1 << SH_type_l1_64_shadow
> +        | 1 << SH_type_fl1_64_shadow
> +        ;
> +
> +    perfc_incr(shadow_mappings);
> +
> +    hash_pfn_foreach(v, callback_mask, callbacks,
> +                     range->begin_pfn,
> +                     range->end_pfn);
> +
> +#if DEBUG_count_initial_mappings
> +    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
> +             range->begin_pfn, range->end_pfn,
> +             range->nr_mappings);
> +#endif
>  }
>  
> +
>  /**************************************************************************/
>  /* Shadow-control XEN_DOMCTL dispatcher */
>  
> diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
> index 4967da1..bb983bc 100644
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -35,6 +35,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/cacheattr.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/guest_pt.h>
>  #include <public/sched.h>
> @@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
>      SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
>                     gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
>      ASSERT(mfn_to_page(smfn)->u.sh.head);
> +
> +    /* Removing any dv_paddr_links to the erstwhile shadow page */
> +    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
> +    
>      shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
>  }
>  
> @@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
>                     v->domain->domain_id, v->vcpu_id,
>                     mfn_x(gmfn), shadow_type, mfn_x(smfn));
>      ASSERT(mfn_to_page(smfn)->u.sh.head);
> +    
> +    /* Removing any dv_paddr_links to the erstwhile shadow page */
> +    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
> +    
>      shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
>      /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
>      if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
> @@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
>      guest_l1e_t guest_entry = { guest_intpte };
>      shadow_l1e_t *sp = shadow_entry_ptr;
>      struct domain *d = v->domain;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
>      gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
>      u32 pass_thru_flags;
>      u32 gflags, sflags;
> @@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
>          }
>      }
>  
> -    if ( unlikely((level == 1) && dirty_vram
> -            && dirty_vram->last_dirty == -1
> -            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
> -            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
> -    {
> -        if ( ft & FETCH_TYPE_WRITE )
> -            dirty_vram->last_dirty = NOW();
> -        else
> -            sflags &= ~_PAGE_RW;
> -    }
> -
>      /* Read-only memory */
>      if ( p2m_is_readonly(p2mt) ||
>           (p2mt == p2m_mmio_direct &&
> @@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
>      return flags;
>  }
>  
> -static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
> +/* shadow_vram_fix_l1e()
> + *
> + * Tests L1PTEs as they are modified, looking for when they start to (or
> + * cease to) point to frame buffer pages.  If the old and new gfns differ,
> + * calls dirty_vram_range_update() to updates the dirty_vram structures.
> + */
> +static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
> +                                       shadow_l1e_t new_sl1e,
>                                         shadow_l1e_t *sl1e,
>                                         mfn_t sl1mfn,
>                                         struct domain *d)
>  { 
> -    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
> -    int flags = shadow_l1e_get_flags(new_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    mfn_t new_mfn, old_mfn;
> +    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
> +    paddr_t sl1ma;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
>  
> -    if ( !dirty_vram         /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> +    if ( !dirty_vram )
>          return;
>  
> -    gfn = mfn_to_gfn(d, mfn);
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> +    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
>  
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    old_mfn = shadow_l1e_get_mfn(old_sl1e);
> +
> +    if ( !sh_l1e_is_magic(old_sl1e) &&
> +         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
> +         mfn_valid(old_mfn))
>      {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -            /* Initial guest reference, record it */
> -            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
> -                | ((unsigned long)sl1e & ~PAGE_MASK);
> +        old_gfn = mfn_to_gfn(d, old_mfn);
>      }
> -}
> -
> -static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
> -                                       shadow_l1e_t *sl1e,
> -                                       mfn_t sl1mfn,
> -                                       struct domain *d)
> -{
> -    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
> -    int flags = shadow_l1e_get_flags(old_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> -
> -    if ( !dirty_vram         /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> -        return;
> -
> -    gfn = mfn_to_gfn(d, mfn);
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> -
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    
> +    new_mfn = shadow_l1e_get_mfn(new_sl1e);
> +    if ( !sh_l1e_is_magic(new_sl1e) &&
> +         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
> +         mfn_valid(new_mfn))
>      {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        int dirty = 0;
> -        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
> -            | ((unsigned long)sl1e & ~PAGE_MASK);
> +        new_gfn = mfn_to_gfn(d, new_mfn);
> +    }
>  
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -        {
> -            /* Last reference */
> -            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
> -                /* We didn't know it was that one, let's say it is dirty */
> -                dirty = 1;
> -            }
> -            else
> -            {
> -                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -                if ( flags & _PAGE_DIRTY )
> -                    dirty = 1;
> -            }
> -        }
> -        else
> +    if ( old_gfn == new_gfn ) return;
> +
> +    if ( VALID_M2P(old_gfn) )
> +        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
>          {
> -            /* We had more than one reference, just consider the page dirty. */
> -            dirty = 1;
> -            /* Check that it's not the one we recorded. */
> -            if ( dirty_vram->sl1ma[i] == sl1ma )
> -            {
> -                /* Too bad, we remembered the wrong one... */
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -            }
> -            else
> -            {
> -                /* Ok, our recorded sl1e is still pointing to this page, let's
> -                 * just hope it will remain. */
> -            }
> +            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
> +                          old_gfn, mfn_x(old_mfn));
>          }
> -        if ( dirty )
> +
> +    if ( VALID_M2P(new_gfn) )
> +        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
>          {
> -            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -            dirty_vram->last_dirty = NOW();
> +            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
> +                          new_gfn, mfn_x(new_mfn));
>          }
> -    }
>  }
>  
>  static int shadow_set_l1e(struct vcpu *v, 
> @@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
>                  shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
>                  /* fall through */
>              case 0:
> -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
>                  break;
>              }
>          }
>      } 
>  
> +    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
> +
>      /* Write the new entry */
>      shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
>      flags |= SHADOW_SET_CHANGED;
> @@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
>           * trigger a flush later. */
>          if ( shadow_mode_refcounts(d) ) 
>          {
> -            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
>              shadow_put_page_from_l1e(old_sl1e, d);
>              TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
>          } 
> @@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
>          SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
>              if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
>                   && !sh_l1e_is_magic(*sl1e) ) {
> -                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
>                  shadow_put_page_from_l1e(*sl1e, d);
>              }
>          });
> @@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
>      return done;
>  }
>  
> +
> +int sh_find_vram_mappings_in_l1(struct vcpu *v,
> +                                mfn_t sl1mfn,
> +                                unsigned long begin_pfn,
> +                                unsigned long end_pfn,
> +                                int *removed)
> +/* Find all VRAM mappings in this shadow l1 table */
> +{
> +    struct domain *d = v->domain;
> +    shadow_l1e_t *sl1e;
> +    int done = 0;
> +
> +    /* only returns _PAGE_PRESENT entries */
> +    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
> +    {
> +        unsigned long gfn;
> +        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
> +        if ( !mfn_valid(gmfn) )
> +            continue;
> +        gfn = mfn_to_gfn(d, gmfn);
> +        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
> +        {
> +            paddr_t sl1ma =
> +                pfn_to_paddr(mfn_x(sl1mfn)) |
> +                ( (unsigned long)sl1e & ~PAGE_MASK );
> +            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
> +        }
> +    });
> +    return 0;
> +}
> +
>  /**************************************************************************/
>  /* Functions to excise all pointers to shadows from higher-level shadows. */
>  
> diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
> index 835121e..436a4ac 100644
> --- a/xen/arch/x86/mm/shadow/multi.h
> +++ b/xen/arch/x86/mm/shadow/multi.h
> @@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
>  extern int
>  SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
>      (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
> -
> +extern int
> +SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
> +     (struct vcpu *v, mfn_t sl1mfn, 
> +      unsigned long begin_pfn,
> +      unsigned long end_pfn,
> +      int *removed);
>  extern void
>  SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
>      (struct vcpu *v, void *ep, mfn_t smfn);
> diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
> index 43ce1db..5b0f9f7 100644
> --- a/xen/arch/x86/mm/shadow/types.h
> +++ b/xen/arch/x86/mm/shadow/types.h
> @@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
>  #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
>  #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
>  #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
> +#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
>  #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
>  #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
>  #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
> diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
> new file mode 100644
> index 0000000..53be66e
> --- /dev/null
> +++ b/xen/include/asm-x86/dirty_vram.h
> @@ -0,0 +1,202 @@
> +/****************************************************************************
> + * include/asm-x86/dirty_vram.h
> + *
> + * Interface for tracking dirty VRAM pages
> + *
> + * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
> + * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
> + * Parts of this code are Copyright (c) 2006 by XenSource Inc.
> + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
> + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#ifndef _DIRTY_VRAM_H
> +#define _DIRTY_VRAM_H
> +
> +/*
> + * In shadow mode we need to bookkeep all the L1 page table entries that
> + * map a frame buffer page.  Struct dv_paddr_link does this by
> + * recording the address of a L1 page table entry for some frame buffer page.
> + * Also has a link to additional pl entries if the frame buffer page
> + * has multiple mappings.
> + * In practice very few pages have multiple mappings.
> + * But to rule out some pathological situation, we limit the number of
> + * mappings we're willing to bookkeep.
> + */
> +
> +#define DV_ADDR_LINK_LIST_LIMIT 64
> +
> +typedef struct dv_paddr_link {
> +    paddr_t sl1ma;
> +    struct dv_paddr_link *pl_next;
> +} dv_paddr_link_t;
> +
> +typedef struct dv_pl_entry {
> +    dv_paddr_link_t mapping;
> +    bool_t stuck_dirty;
> +} dv_pl_entry_t;
> +
> +/*
> + * This defines an extension page of pl entries for FB pages with multiple
> + * mappings. All such pages (of a domain) are linked together.
> + */
> +typedef struct dv_paddr_link_ext {
> +    struct list_head ext_link;
> +    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
> +                             sizeof( dv_paddr_link_t ) ];
> +} dv_paddr_link_ext_t;
> +
> +/*
> + * This defines a single frame buffer range.  It bookkeeps all the
> + * level 1 PTEs that map guest pages within that range.
> + * All such ranges (of a domain) are linked together.
> + */
> +typedef struct dv_range {
> +    struct list_head range_link; /* the several ranges form a linked list */
> +    unsigned long begin_pfn;
> +    unsigned long end_pfn;
> +    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
> +    int nr_mappings;  /* total number of mappings in this range */
> +    int mappings_hwm; /* high water mark of max mapping count */
> +    unsigned int dirty_count;
> +} dv_range_t;
> +
> +/*
> + * This contains all the data structures required by a domain to
> + * bookkeep the dirty pages within its frame buffers.
> + */
> +typedef struct dv_dirty_vram {
> +    struct list_head range_head; /* head of the linked list of ranges */
> +    struct list_head ext_head; /* head of list of extension pages */
> +    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
> +    int nr_ranges; /* bookkeeps number of ranges */
> +    int ranges_hwm; /* high water mark of max number of ranges */
> +} dv_dirty_vram_t;
> +
> +/* Allocates domain's dirty_vram structure */
> +dv_dirty_vram_t *
> +dirty_vram_alloc(struct domain *d);
> +
> +/*
> + * Returns domain's dirty_vram structure,
> + * allocating it if necessary
> + */
> +dv_dirty_vram_t *
> +dirty_vram_find_or_alloc(struct domain *d);
> +
> +/* Frees domain's dirty_vram structure */
> +void dirty_vram_free(struct domain *d);
> +
> +/* Returns dirty vram range containing gfn, NULL if none */
> +struct dv_range *
> +dirty_vram_range_find_gfn(struct domain *d,
> +                          unsigned long gfn);
> +
> +/*
> + * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
> + * NULL if none
> + */
> +dv_range_t *
> +dirty_vram_range_find(struct domain *d,
> +                      unsigned long begin_pfn,
> +                      unsigned long nr);
> +
> +/*
> + * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
> + * freeing any existing range that overlaps the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_alloc(struct domain *d,
> +                       unsigned long begin_pfn,
> +                       unsigned long nr);
> +
> +/*
> + * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
> + * creating a range if none already exists and
> + * freeing any existing range that overlaps the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_find_or_alloc(struct domain *d,
> +                               unsigned long begin_pfn,
> +                               unsigned long nr);
> +
> +void dirty_vram_range_free(struct domain *d,
> +                           dv_range_t *range);
> +
> +/* Bookkeep PTE address of a frame buffer page */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set);
> +
> +/*
> + * smfn is no longer a shadow page.  Remove it from any
> + * dirty vram range mapping.
> + */
> +void
> +dirty_vram_delete_shadow(struct vcpu *v,
> +                         unsigned long gfn,
> +                         unsigned int shadow_type,
> +                         mfn_t smfn);
> +
> +
> +/*
> + * Scan all the L1 tables looking for VRAM mappings.
> + * Record them in the domain's dv_dirty_vram structure
> + */
> +void sh_find_all_vram_mappings(struct vcpu *v,
> +                               dv_range_t *range);
> +
> +/*
> + * Free a paddr_link struct, given address of its
> + * predecessor in singly-linked list
> + */
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                dv_paddr_link_t **ppl,
> +                dv_paddr_link_t *pl);
> +
> +
> +/* Enable VRAM dirty tracking. */
> +int
> +shadow_track_dirty_vram(struct domain *d,
> +			unsigned long first_pfn,
> +			unsigned long nr,
> +			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> +
> +int
> +hap_track_dirty_vram(struct domain *d,
> +		     unsigned long begin_pfn,
> +		     unsigned long nr,
> +		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> +
> +void
> +hap_clean_vram_tracking_range(struct domain *d,
> +			      unsigned long begin_pfn,
> +			      unsigned long nr,
> +			      uint8_t *dirty_bitmap);
> +
> +#endif /* _DIRTY_VRAM_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
> index 916a35b..3e3a1f5 100644
> --- a/xen/include/asm-x86/hap.h
> +++ b/xen/include/asm-x86/hap.h
> @@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
>  void  hap_teardown(struct domain *d);
>  void  hap_vcpu_init(struct vcpu *v);
>  void  hap_logdirty_init(struct domain *d);
> -int   hap_track_dirty_vram(struct domain *d,
> -                           unsigned long begin_pfn,
> -                           unsigned long nr,
> -                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
>  
>  extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
>  
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
> index 27b3de5..0cc7b05 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -74,7 +74,7 @@ struct hvm_domain {
>      struct list_head       pinned_cacheattr_ranges;
>  
>      /* VRAM dirty support. */
> -    struct sh_dirty_vram *dirty_vram;
> +    struct dv_dirty_vram  *dirty_vram;
>  
>      /* If one of vcpus of this domain is in no_fill_mode or
>       * mtrr/pat between vcpus is not the same, set is_in_uc_mode
> diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
> index c3a8848..e22df38 100644
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
>                             int  (*disable_log_dirty)(struct domain *d),
>                             void (*clean_dirty_bitmap)(struct domain *d));
>  
> -/* mark a page as dirty */
> +/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
>  void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
>  
> +/* mark a gpfn as dirty */
> +void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
> +
> +
>  /* is this guest page dirty? 
>   * This is called from inside paging code, with the paging lock held. */
>  int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
> @@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
>  #define L4_LOGDIRTY_IDX(pfn) 0
>  #endif
>  
> -/* VRAM dirty tracking support */
> -struct sh_dirty_vram {
> -    unsigned long begin_pfn;
> -    unsigned long end_pfn;
> -    paddr_t *sl1ma;
> -    uint8_t *dirty_bitmap;
> -    s_time_t last_dirty;
> -};
> -
>  /*****************************************************************************
>   * Entry points into the paging-assistance code */
>  
> diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
> index 2eb6efc..940d7fd 100644
> --- a/xen/include/asm-x86/shadow.h
> +++ b/xen/include/asm-x86/shadow.h
> @@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
>  /* Enable an arbitrary shadow mode.  Call once at domain creation. */
>  int shadow_enable(struct domain *d, u32 mode);
>  
> -/* Enable VRAM dirty bit tracking. */
> -int shadow_track_dirty_vram(struct domain *d,
> -                            unsigned long first_pfn,
> -                            unsigned long nr,
> -                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> -
>  /* Handler for shadow control ops: operations from user-space to enable
>   * and disable ephemeral shadow modes (test mode and log-dirty mode) and
>   * manipulate the log-dirty bitmap. */
> -- 
> 1.7.9.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2013-01-21 19:28 Robert Phillips
  2013-01-22  7:31 ` Pasi Kärkkäinen
  2013-01-24 11:25 ` Tim Deegan
  0 siblings, 2 replies; 35+ messages in thread
From: Robert Phillips @ 2013-01-21 19:28 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP)
modes. This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor
(or monitors) that have changed.
Each monitor has a frame buffer of some size at some position
in guest physical memory.
The set of frame buffers being tracked can change over time as monitors
are plugged and unplugged.

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   20 +-
 xen/arch/x86/hvm/hvm.c           |    9 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     |  866 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  111 -----
 xen/arch/x86/mm/p2m.c            |   11 +-
 xen/arch/x86/mm/paging.c         |   57 ++-
 xen/arch/x86/mm/shadow/common.c  |  335 ++++++++-------
 xen/arch/x86/mm/shadow/multi.c   |  174 ++++----
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  202 +++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   15 +-
 xen/include/asm-x86/shadow.h     |    6 -
 16 files changed, 1424 insertions(+), 397 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 32122fd..cd4e1ef 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1563,15 +1563,23 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
+ *
+ * If nr is not a multiple of 64, only the first nr bits of bitmap
+ * are well defined.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 40c1ab2..8ddb91d 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -66,6 +67,7 @@
 #include <asm/mem_event.h>
 #include <asm/mem_access.h>
 #include <public/mem_event.h>
+#include "../mm/mm-locks.h"
 
 bool_t __read_mostly hvm_enabled;
 
@@ -1433,8 +1435,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
+                                 p2m_ram_rw) == p2m_ram_logdirty )
+            {
+                paging_mark_dirty_gpfn(v->domain, gfn);
+            }
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..75465ca
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,866 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include <asm/hap.h>
+#include <asm/config.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          0
+#define DEBUG_allocating_dirty_vram_range     0
+#define DEBUG_high_water_mark_for_vram_ranges 0
+#define DEBUG_freeing_dirty_vram_range        0
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+#define DEBUG_alloc_paddr_inject_fault        0
+#define DEBUG_link_limit_exceeded             0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            struct dv_paddr_link_ext *ext =
+                container_of(
+                    curr, struct dv_paddr_link_ext, ext_link);
+            struct page_info *pg = __virt_to_page(ext);
+            unmap_domain_page(ext);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_pl_entry_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].mapping.pl_next;
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != NULL )
+                        plx = plx->pl_next;
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn )
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static
+dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_ext_t *ext = NULL;
+    
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == NULL ) /* yes */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+
+        ext = __map_domain_page(pg);
+        /* Is unmapped in dirty_vram_free() */
+        if ( ext == NULL )
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walks has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor element's next member.
+ *
+ * After linking the precessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and free it.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if ( ppl ) /* yes. free it */
+    {
+        ASSERT(pl == (*ppl));
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /*
+         * move 2nd mapping to main table.
+         * and free 2nd mapping
+         */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if ( spl == NULL )
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    dv_paddr_link_t **ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = &range->pl_tab[ i ].mapping;
+    ppl = NULL;
+
+    /*
+     * find matching entry (pl), if any, and its predecessor
+     * in linked list (ppl)
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+            
+        ppl = &pl->pl_next;
+        pl = *ppl;
+        len++;
+    }
+
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* no, so we'll need to alloc a link */
+        {
+            ASSERT(ppl != NULL);
+            
+#if DEBUG_alloc_paddr_inject_fault
+            {
+                static int counter;
+                
+                /* Test stuck_dirty logic for some cases */
+                if ( (++counter) % 4 == 0 )
+                {
+                    /* Simply mark the frame buffer page as always dirty */
+                    range->pl_tab[ i ].stuck_dirty = 1;
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] inject stuck dirty fault\n",
+                             gfn );
+                    goto out;
+                }
+            }
+#endif
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* yes */
+            {
+#if DEBUG_link_limit_exceeded
+                if ( !range->pl_tab[ i ].stuck_dirty )
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] link limit exceeded\n",
+                             gfn );
+#endif            
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                goto out;
+            }
+
+            /* alloc link and append it to list */
+            (*ppl) = pl = alloc_paddr_link(d);
+            /* Were we able to allocate a link? */
+            if ( pl == NULL ) /* no */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] alloc failure\n",
+                         gfn );
+                
+                goto out;
+            }
+        }
+        if ( pl->sl1ma != sl1ma )
+        {
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+            range->nr_mappings++;
+        }
+        effective = 1;
+        if ( len > range->mappings_hwm )
+        {
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d "
+                     "freepages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     range->nr_mappings,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                     gfn, sl1ma,
+                     range->nr_mappings - 1);
+#endif
+            free_paddr_link(d, ppl, pl);
+            --range->nr_mappings;
+            effective = 1;
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static
+int shadow_scan_dirty_flags(struct domain *d,
+                            dv_range_t *range,
+                            uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+    l1_pgentry_t *sl1e = NULL;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        /* Does the frame buffer have an incomplete set of mappings? */
+        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* yes */
+            dirty = 1;
+        else /* The frame buffer's set of mappings is complete.  Scan it. */
+            for ( pl = &range->pl_tab[i].mapping;
+                  pl;
+                  pl = pl->pl_next, len++ )
+            {
+                paddr_t sl1ma = pl->sl1ma;
+                if ( sl1ma == INVALID_PADDR ) /* FB page is unmapped */
+                    continue;
+
+                if ( sl1e ) /* cleanup from previous iteration */
+                    unmap_domain_page(sl1e);
+
+                sl1e = map_domain_page(sl1ma >> PAGE_SHIFT);
+                sl1e += (sl1ma & (PAGE_SIZE - 1 ))/sizeof(*sl1e);
+                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                {
+                    dirty = 1;
+                    /* Clear dirty so we can detect if page gets re-dirtied.
+                     * Note: this is atomic, so we may clear a
+                     * _PAGE_ACCESSED set by another processor.
+                     */
+                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                    flush_tlb = 1;
+                }
+            } /* for */
+        
+        if ( dirty )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+    
+    if ( sl1e )
+        unmap_domain_page(sl1e);
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    uint8_t *dirty_bitmap = NULL;
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
+     * is a pfn beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if ( !nr )
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range(), which interrogates each vram
+ * page's p2m type looking for pages that have been made writable.
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    uint8_t *dirty_bitmap = NULL;
+
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        if ( !paging_mode_log_dirty(d) )
+        {
+            hap_logdirty_init(d);
+            rc = paging_log_dirty_enable(d);
+            if ( rc )
+                goto out;
+        }
+
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+        
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( !dirty_vram ) 
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+        }
+        
+        range = dirty_vram_range_find(d, begin_pfn, nr);
+        if ( !range )
+        {
+            rc = -ENOMEM;
+            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+            
+            paging_unlock(d);
+            
+            /* set l1e entries of range within P2M table to be read-only. */
+            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
+                                  p2m_ram_rw, p2m_ram_logdirty);
+            
+            flush_tlb_mask(d->domain_dirty_cpumask);
+            
+            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+        }
+        else
+        {
+            paging_unlock(d);
+            
+            domain_pause(d);
+            
+            /* get the bitmap */
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+            
+            domain_unpause(d);
+        }
+        
+        
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else {
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( dirty_vram )
+        {
+            /*
+             * If zero pages specified while tracking dirty vram
+             * then stop tracking
+             */
+            dirty_vram_free(d);
+        
+        }
+        
+        paging_unlock(d);
+    }
+out:
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index a95ccbf..f7d979b 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -53,117 +53,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-/*
- * hap_track_dirty_vram()
- * Create the domain's dv_dirty_vram struct on demand.
- * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
- * first encountered.
- * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
- * calling paging_log_dirty_range(), which interrogates each vram
- * page's p2m type looking for pages that have been made writable.
- */
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram;
-    uint8_t *dirty_bitmap = NULL;
-
-    if ( nr )
-    {
-        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
-
-        if ( !paging_mode_log_dirty(d) )
-        {
-            hap_logdirty_init(d);
-            rc = paging_log_dirty_enable(d);
-            if ( rc )
-                goto out;
-        }
-
-        rc = -ENOMEM;
-        dirty_bitmap = xzalloc_bytes(size);
-        if ( !dirty_bitmap )
-            goto out;
-
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
-            {
-                paging_unlock(d);
-                goto out;
-            }
-
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-        }
-
-        if ( begin_pfn != dirty_vram->begin_pfn ||
-             begin_pfn + nr != dirty_vram->end_pfn )
-        {
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-
-            paging_unlock(d);
-
-            /* set l1e entries of range within P2M table to be read-only. */
-            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
-                                  p2m_ram_rw, p2m_ram_logdirty);
-
-            flush_tlb_mask(d->domain_dirty_cpumask);
-
-            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
-        }
-        else
-        {
-            paging_unlock(d);
-
-            domain_pause(d);
-
-            /* get the bitmap */
-            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-
-            domain_unpause(d);
-        }
-
-        rc = -EFAULT;
-        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
-            rc = 0;
-    }
-    else
-    {
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( dirty_vram )
-        {
-            /*
-             * If zero pages specified while tracking dirty vram
-             * then stop tracking
-             */
-            xfree(dirty_vram);
-            d->arch.hvm_domain.dirty_vram = NULL;
-        }
-
-        paging_unlock(d);
-    }
-out:
-    if ( dirty_bitmap )
-        xfree(dirty_bitmap);
-
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 258f46e..41d0fe3 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -690,20 +690,23 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
-
     p2m_lock(p2m);
-    p2m->defer_nested_flush = 1;
 
+    p2m->defer_nested_flush = 1;
+    
     for ( gfn = start; gfn < end; gfn++ )
     {
         mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
         if ( pt == ot )
-            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
+                          p2m->default_access);
     }
-
+    
     p2m->defer_nested_flush = 0;
+
     if ( nestedhvm_enabled(d) )
         p2m_flush_nestedp2m(d);
+
     p2m_unlock(p2m);
 }
 
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index a5cdbd1..cd44f6e 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
     return ret;
 }
 
-/* Mark a page as dirty */
+/* Given a guest mfn, mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
     mfn_t gmfn;
-    int changed;
-    mfn_t mfn, *l4, *l3, *l2;
-    unsigned long *l1;
-    int i1, i2, i3, i4;
 
     gmfn = _mfn(guest_mfn);
 
@@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+    paging_mark_dirty_gpfn(d, pfn);
+}
+
+
+/* Given a guest pfn, mark a page as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
+{
+    int changed;
+    mfn_t mfn, *l4, *l3, *l2;
+    unsigned long *l1;
+    int i1, i2, i3, i4;
+    dv_range_t *range;
+    
     /* Shared MFNs should NEVER be marked dirty */
     BUG_ON(SHARED_M2P(pfn));
 
@@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
+    d->arch.paging.log_dirty.dirty_count++;
+    range = dirty_vram_range_find_gfn(d, pfn);
+    if ( range )
+        range->dirty_count++;
+
     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
     {
          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
@@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int i;
     unsigned long pfn;
+    dv_range_t *range;
+    unsigned int range_dirty_count;
 
+    paging_lock(d);
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( !range )
+    {
+        paging_unlock(d);
+        goto out;
+    }
+    
+    range_dirty_count = range->dirty_count;
+    range->dirty_count = 0;
+
+    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
+
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 begin_pfn + nr,
+                 range_dirty_count);
     /*
      * Set l1e entries of P2M table to be read-only.
      *
@@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
 
     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
     {
-        p2m_type_t pt;
-        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
-        if ( pt == p2m_ram_rw )
+        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
+             p2m_ram_rw )
             dirty_bitmap[i >> 3] |= (1 << (i & 7));
     }
 
     p2m_unlock(p2m);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
+
+ out:
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ce79131..1e4b880 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3464,178 +3460,217 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 /**************************************************************************/
 /* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
 
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          0
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while ( pl != NULL )
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if ( sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if ( sl1mn == mfn_x(smfn) ) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if ( mappings > max_mappings )
+            max_mappings = mappings;
+        
+        if ( unshadowed ) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
+                     "max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i];
+              x;
+              x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 4967da1..bb983bc 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to (or
+ * cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..53be66e
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,202 @@
+/****************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this by
+ * recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 64
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+typedef struct dv_pl_entry {
+    dv_paddr_link_t mapping;
+    bool_t stuck_dirty;
+} dv_pl_entry_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the
+ * level 1 PTEs that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..0cc7b05 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram  *dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index c3a8848..e22df38 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
                            int  (*disable_log_dirty)(struct domain *d),
                            void (*clean_dirty_bitmap)(struct domain *d));
 
-/* mark a page as dirty */
+/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
+/* mark a gpfn as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
+
+
 /* is this guest page dirty? 
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
@@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-16 15:11 Robert Phillips
@ 2013-01-17 11:54 ` Tim Deegan
  0 siblings, 0 replies; 35+ messages in thread
From: Tim Deegan @ 2013-01-17 11:54 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

At 10:11 -0500 on 16 Jan (1358331063), Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> 
> Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>

Applied, thanks. 

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2013-01-16 15:11 Robert Phillips
  2013-01-17 11:54 ` Tim Deegan
  0 siblings, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2013-01-16 15:11 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP)
modes. This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor
(or monitors) that have changed.
Each monitor has a frame buffer of some size at some position
in guest physical memory.
The set of frame buffers being tracked can change over time as monitors
are plugged and unplugged.

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   20 +-
 xen/arch/x86/hvm/hvm.c           |    9 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     |  864 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  111 -----
 xen/arch/x86/mm/p2m.c            |   11 +-
 xen/arch/x86/mm/paging.c         |   57 ++-
 xen/arch/x86/mm/shadow/common.c  |  335 ++++++++-------
 xen/arch/x86/mm/shadow/multi.c   |  174 ++++----
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  202 +++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   15 +-
 xen/include/asm-x86/shadow.h     |    6 -
 16 files changed, 1422 insertions(+), 397 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 32122fd..cd4e1ef 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1563,15 +1563,23 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
+ *
+ * If nr is not a multiple of 64, only the first nr bits of bitmap
+ * are well defined.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 40c1ab2..8ddb91d 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -66,6 +67,7 @@
 #include <asm/mem_event.h>
 #include <asm/mem_access.h>
 #include <public/mem_event.h>
+#include "../mm/mm-locks.h"
 
 bool_t __read_mostly hvm_enabled;
 
@@ -1433,8 +1435,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
+                                 p2m_ram_rw) == p2m_ram_logdirty )
+            {
+                paging_mark_dirty_gpfn(v->domain, gfn);
+            }
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..d659bd1
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,864 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include <asm/hap.h>
+#include <asm/config.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          0
+#define DEBUG_allocating_dirty_vram_range     0
+#define DEBUG_high_water_mark_for_vram_ranges 0
+#define DEBUG_freeing_dirty_vram_range        0
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+#define DEBUG_alloc_paddr_inject_fault        0
+#define DEBUG_link_limit_exceeded             0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            struct dv_paddr_link_ext *ext =
+                container_of(
+                    curr, struct dv_paddr_link_ext, ext_link);
+            struct page_info *pg = __virt_to_page(ext);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_pl_entry_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].mapping.pl_next;
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != NULL )
+                        plx = plx->pl_next;
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn )
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_ext_t *ext = NULL;
+    
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == NULL ) /* yes */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+
+        ext = map_domain_page(pg);
+        if ( ext == NULL )
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    if ( ext )
+        unmap_domain_page(ext);
+    
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walks has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor element's next member.
+ *
+ * After linking the precessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and free it.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if ( ppl ) /* yes. free it */
+    {
+        ASSERT(pl == (*ppl));
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /*
+         * move 2nd mapping to main table.
+         * and free 2nd mapping
+         */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if ( spl == NULL )
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    dv_paddr_link_t **ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = &range->pl_tab[ i ].mapping;
+    ppl = NULL;
+
+    /*
+     * find matching entry (pl), if any, and its predecessor
+     * in linked list (ppl)
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+            
+        ppl = &pl->pl_next;
+        pl = *ppl;
+        len++;
+    }
+
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* no, so we'll need to alloc a link */
+        {
+            ASSERT(ppl != NULL);
+            
+#if DEBUG_alloc_paddr_inject_fault
+            {
+                static int counter;
+                
+                /* Test stuck_dirty logic for some cases */
+                if ( (++counter) % 4 == 0 )
+                {
+                    /* Simply mark the frame buffer page as always dirty */
+                    range->pl_tab[ i ].stuck_dirty = 1;
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] inject stuck dirty fault\n",
+                             gfn );
+                    goto out;
+                }
+            }
+#endif
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* yes */
+            {
+#if DEBUG_link_limit_exceeded
+                if ( !range->pl_tab[ i ].stuck_dirty )
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] link limit exceeded\n",
+                             gfn );
+#endif            
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                goto out;
+            }
+
+            /* alloc link and append it to list */
+            (*ppl) = pl = alloc_paddr_link(d);
+            /* Were we able to allocate a link? */
+            if ( pl == NULL ) /* no */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] alloc failure\n",
+                         gfn );
+                
+                goto out;
+            }
+        }
+        if ( pl->sl1ma != sl1ma )
+        {
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+            range->nr_mappings++;
+        }
+        effective = 1;
+        if ( len > range->mappings_hwm )
+        {
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d "
+                     "freepages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     range->nr_mappings,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                     gfn, sl1ma,
+                     range->nr_mappings - 1);
+#endif
+            free_paddr_link(d, ppl, pl);
+            --range->nr_mappings;
+            effective = 1;
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+    l1_pgentry_t *sl1e = NULL;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        /* Does the frame buffer have an incomplete set of mappings? */
+        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* yes */
+            dirty = 1;
+        else /* The frame buffer's set of mappings is complete.  Scan it. */
+            for ( pl = &range->pl_tab[i].mapping;
+                  pl;
+                  pl = pl->pl_next, len++ )
+            {
+                paddr_t sl1ma = pl->sl1ma;
+                if ( sl1ma == INVALID_PADDR ) /* FB page is unmapped */
+                    continue;
+
+                if ( sl1e ) /* cleanup from previous iteration */
+                    unmap_domain_page(sl1e);
+
+                sl1e = map_domain_page(sl1ma >> PAGE_SHIFT);
+                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                {
+                    dirty = 1;
+                    /* Clear dirty so we can detect if page gets re-dirtied.
+                     * Note: this is atomic, so we may clear a
+                     * _PAGE_ACCESSED set by another processor.
+                     */
+                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                    flush_tlb = 1;
+                }
+            } /* for */
+        
+        if ( dirty )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+    
+    if ( sl1e )
+        unmap_domain_page(sl1e);
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    uint8_t *dirty_bitmap = NULL;
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
+     * is a pfn beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if ( !nr )
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range(), which interrogates each vram
+ * page's p2m type looking for pages that have been made writable.
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    uint8_t *dirty_bitmap = NULL;
+
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        if ( !paging_mode_log_dirty(d) )
+        {
+            hap_logdirty_init(d);
+            rc = paging_log_dirty_enable(d);
+            if ( rc )
+                goto out;
+        }
+
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+        
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( !dirty_vram ) 
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+        }
+        
+        range = dirty_vram_range_find(d, begin_pfn, nr);
+        if ( !range )
+        {
+            rc = -ENOMEM;
+            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+            
+            paging_unlock(d);
+            
+            /* set l1e entries of range within P2M table to be read-only. */
+            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
+                                  p2m_ram_rw, p2m_ram_logdirty);
+            
+            flush_tlb_mask(d->domain_dirty_cpumask);
+            
+            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+        }
+        else
+        {
+            paging_unlock(d);
+            
+            domain_pause(d);
+            
+            /* get the bitmap */
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+            
+            domain_unpause(d);
+        }
+        
+        
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else {
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( dirty_vram )
+        {
+            /*
+             * If zero pages specified while tracking dirty vram
+             * then stop tracking
+             */
+            dirty_vram_free(d);
+        
+        }
+        
+        paging_unlock(d);
+    }
+out:
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index a95ccbf..f7d979b 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -53,117 +53,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-/*
- * hap_track_dirty_vram()
- * Create the domain's dv_dirty_vram struct on demand.
- * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
- * first encountered.
- * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
- * calling paging_log_dirty_range(), which interrogates each vram
- * page's p2m type looking for pages that have been made writable.
- */
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram;
-    uint8_t *dirty_bitmap = NULL;
-
-    if ( nr )
-    {
-        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
-
-        if ( !paging_mode_log_dirty(d) )
-        {
-            hap_logdirty_init(d);
-            rc = paging_log_dirty_enable(d);
-            if ( rc )
-                goto out;
-        }
-
-        rc = -ENOMEM;
-        dirty_bitmap = xzalloc_bytes(size);
-        if ( !dirty_bitmap )
-            goto out;
-
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
-            {
-                paging_unlock(d);
-                goto out;
-            }
-
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-        }
-
-        if ( begin_pfn != dirty_vram->begin_pfn ||
-             begin_pfn + nr != dirty_vram->end_pfn )
-        {
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-
-            paging_unlock(d);
-
-            /* set l1e entries of range within P2M table to be read-only. */
-            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
-                                  p2m_ram_rw, p2m_ram_logdirty);
-
-            flush_tlb_mask(d->domain_dirty_cpumask);
-
-            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
-        }
-        else
-        {
-            paging_unlock(d);
-
-            domain_pause(d);
-
-            /* get the bitmap */
-            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-
-            domain_unpause(d);
-        }
-
-        rc = -EFAULT;
-        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
-            rc = 0;
-    }
-    else
-    {
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( dirty_vram )
-        {
-            /*
-             * If zero pages specified while tracking dirty vram
-             * then stop tracking
-             */
-            xfree(dirty_vram);
-            d->arch.hvm_domain.dirty_vram = NULL;
-        }
-
-        paging_unlock(d);
-    }
-out:
-    if ( dirty_bitmap )
-        xfree(dirty_bitmap);
-
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 258f46e..41d0fe3 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -690,20 +690,23 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
-
     p2m_lock(p2m);
-    p2m->defer_nested_flush = 1;
 
+    p2m->defer_nested_flush = 1;
+    
     for ( gfn = start; gfn < end; gfn++ )
     {
         mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
         if ( pt == ot )
-            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
+                          p2m->default_access);
     }
-
+    
     p2m->defer_nested_flush = 0;
+
     if ( nestedhvm_enabled(d) )
         p2m_flush_nestedp2m(d);
+
     p2m_unlock(p2m);
 }
 
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index a5cdbd1..cd44f6e 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
     return ret;
 }
 
-/* Mark a page as dirty */
+/* Given a guest mfn, mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
     mfn_t gmfn;
-    int changed;
-    mfn_t mfn, *l4, *l3, *l2;
-    unsigned long *l1;
-    int i1, i2, i3, i4;
 
     gmfn = _mfn(guest_mfn);
 
@@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+    paging_mark_dirty_gpfn(d, pfn);
+}
+
+
+/* Given a guest pfn, mark a page as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
+{
+    int changed;
+    mfn_t mfn, *l4, *l3, *l2;
+    unsigned long *l1;
+    int i1, i2, i3, i4;
+    dv_range_t *range;
+    
     /* Shared MFNs should NEVER be marked dirty */
     BUG_ON(SHARED_M2P(pfn));
 
@@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
+    d->arch.paging.log_dirty.dirty_count++;
+    range = dirty_vram_range_find_gfn(d, pfn);
+    if ( range )
+        range->dirty_count++;
+
     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
     {
          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
@@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int i;
     unsigned long pfn;
+    dv_range_t *range;
+    unsigned int range_dirty_count;
 
+    paging_lock(d);
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( !range )
+    {
+        paging_unlock(d);
+        goto out;
+    }
+    
+    range_dirty_count = range->dirty_count;
+    range->dirty_count = 0;
+
+    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
+
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 begin_pfn + nr,
+                 range_dirty_count);
     /*
      * Set l1e entries of P2M table to be read-only.
      *
@@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
 
     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
     {
-        p2m_type_t pt;
-        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
-        if ( pt == p2m_ram_rw )
+        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
+             p2m_ram_rw )
             dirty_bitmap[i >> 3] |= (1 << (i & 7));
     }
 
     p2m_unlock(p2m);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
+
+ out:
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ce79131..1e4b880 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3464,178 +3460,217 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 /**************************************************************************/
 /* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
 
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          0
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while ( pl != NULL )
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if ( sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if ( sl1mn == mfn_x(smfn) ) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if ( mappings > max_mappings )
+            max_mappings = mappings;
+        
+        if ( unshadowed ) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
+                     "max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i];
+              x;
+              x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 4967da1..bb983bc 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to (or
+ * cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..53be66e
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,202 @@
+/****************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this by
+ * recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 64
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+typedef struct dv_pl_entry {
+    dv_paddr_link_t mapping;
+    bool_t stuck_dirty;
+} dv_pl_entry_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the
+ * level 1 PTEs that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..0cc7b05 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram  *dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index c3a8848..e22df38 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
                            int  (*disable_log_dirty)(struct domain *d),
                            void (*clean_dirty_bitmap)(struct domain *d));
 
-/* mark a page as dirty */
+/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
+/* mark a gpfn as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
+
+
 /* is this guest page dirty? 
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
@@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-10 12:29 ` Tim Deegan
@ 2013-01-16 15:10   ` Robert Phillips
  0 siblings, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2013-01-16 15:10 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: Jan Beulich, xen-devel

Tim,
I will be submitting a patch in a few minutes that addresses both of these concerns.
-- rsp

> -----Original Message-----
> From: Tim Deegan [mailto:tim@xen.org]
> Sent: Thursday, January 10, 2013 7:29 AM
> To: Robert Phillips
> Cc: Jan Beulich; xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers
> in Xen
> 
> Hi,
> 
> I was really hoping to commit this today, but unfortunately, I've
> spotted two things:
> 
> 1. There are a few uses of *_to_virt() to get persistent mappings
>    of arbitrary addresses (PTEs, and pages of linked-list entries).
>    That's presumably going to interact badly with Jan's work to support
>    very large machines, which will reintroduce [un]map_domain_page()
>    for 64-bit x86.
> 
>    I wouldn't hold this patch back just for that, specially since the
>    code it replaces already uses maddr_to_virt(), but since it needs a
>    respin for the other thing I'd like Jan's opinion.
> 
> 2. This code still allocates user-controlled amounts of stack memory:
> 
> > +    {
> > +        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
> > +        unsigned long dirty_bitmap[size];
> > +
> > +        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
> > +
> > +        flush_tlb |= shadow_scan_dirty_flags(d, range,
> (uint8_t*)dirty_bitmap);
> > +
> > +        rc = -EFAULT;
> > +        if ( copy_to_guest(guest_dirty_bitmap,
> > +                           (uint8_t*)dirty_bitmap,
> > +                           size * BYTES_PER_LONG) == 0 )
> > +            rc = 0;
> > +    }
> 
> Cheers,
> 
> Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2013-01-02 14:47 Robert Phillips
@ 2013-01-10 12:29 ` Tim Deegan
  2013-01-16 15:10   ` Robert Phillips
  0 siblings, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2013-01-10 12:29 UTC (permalink / raw)
  To: Robert Phillips; +Cc: Jan Beulich, xen-devel

Hi,

I was really hoping to commit this today, but unfortunately, I've
spotted two things: 

1. There are a few uses of *_to_virt() to get persistent mappings
   of arbitrary addresses (PTEs, and pages of linked-list entries).
   That's presumably going to interact badly with Jan's work to support
   very large machines, which will reintroduce [un]map_domain_page()
   for 64-bit x86.

   I wouldn't hold this patch back just for that, specially since the
   code it replaces already uses maddr_to_virt(), but since it needs a
   respin for the other thing I'd like Jan's opinion.

2. This code still allocates user-controlled amounts of stack memory:

> +    {
> +        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
> +        unsigned long dirty_bitmap[size];
> +
> +        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
> +
> +        flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
> +
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           (uint8_t*)dirty_bitmap,
> +                           size * BYTES_PER_LONG) == 0 )
> +            rc = 0;
> +    }

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2013-01-02 14:47 Robert Phillips
  2013-01-10 12:29 ` Tim Deegan
  0 siblings, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2013-01-02 14:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP)
modes. This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor
(or monitors) that have changed.
Each monitor has a frame buffer of some size at some position
in guest physical memory.
The set of frame buffers being tracked can change over time as monitors
are plugged and unplugged.

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   17 +-
 xen/arch/x86/hvm/hvm.c           |    9 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     |  845 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  111 -----
 xen/arch/x86/mm/p2m.c            |   11 +-
 xen/arch/x86/mm/paging.c         |   57 ++-
 xen/arch/x86/mm/shadow/common.c  |  335 ++++++++-------
 xen/arch/x86/mm/shadow/multi.c   |  174 ++++----
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  202 +++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   15 +-
 xen/include/asm-x86/shadow.h     |    6 -
 16 files changed, 1400 insertions(+), 397 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 32122fd..08bc55e 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1563,15 +1563,20 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 40c1ab2..8ddb91d 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -66,6 +67,7 @@
 #include <asm/mem_event.h>
 #include <asm/mem_access.h>
 #include <public/mem_event.h>
+#include "../mm/mm-locks.h"
 
 bool_t __read_mostly hvm_enabled;
 
@@ -1433,8 +1435,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
+                                 p2m_ram_rw) == p2m_ram_logdirty )
+            {
+                paging_mark_dirty_gpfn(v->domain, gfn);
+            }
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..26d5acd
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,845 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include <asm/hap.h>
+#include <asm/config.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          0
+#define DEBUG_allocating_dirty_vram_range     0
+#define DEBUG_high_water_mark_for_vram_ranges 0
+#define DEBUG_freeing_dirty_vram_range        0
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+#define DEBUG_alloc_paddr_inject_fault        0
+#define DEBUG_link_limit_exceeded             0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            struct dv_paddr_link_ext *ext =
+                container_of(
+                    curr, struct dv_paddr_link_ext, ext_link);
+            struct page_info *pg = __virt_to_page(ext);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_pl_entry_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].mapping.pl_next;
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != NULL )
+                        plx = plx->pl_next;
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn )
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == NULL ) /* yes */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+        dv_paddr_link_ext_t *ext = __page_to_virt(pg);
+        if ( ext == NULL )
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walks has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor element's next member.
+ *
+ * After linking the precessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and free it.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if ( ppl ) /* yes. free it */
+    {
+        ASSERT(pl == (*ppl));
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /*
+         * move 2nd mapping to main table.
+         * and free 2nd mapping
+         */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if ( spl == NULL )
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    dv_paddr_link_t **ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = &range->pl_tab[ i ].mapping;
+    ppl = NULL;
+
+    /*
+     * find matching entry (pl), if any, and its predecessor
+     * in linked list (ppl)
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+            
+        ppl = &pl->pl_next;
+        pl = *ppl;
+        len++;
+    }
+
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* no, so we'll need to alloc a link */
+        {
+            ASSERT(ppl != NULL);
+            
+#if DEBUG_alloc_paddr_inject_fault
+            {
+                static int counter;
+                
+                /* Test stuck_dirty logic for some cases */
+                if ( (++counter) % 4 == 0 )
+                {
+                    /* Simply mark the frame buffer page as always dirty */
+                    range->pl_tab[ i ].stuck_dirty = 1;
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] inject stuck dirty fault\n",
+                             gfn );
+                    goto out;
+                }
+            }
+#endif
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* yes */
+            {
+#if DEBUG_link_limit_exceeded
+                if ( !range->pl_tab[ i ].stuck_dirty )
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] link limit exceeded\n",
+                             gfn );
+#endif            
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                goto out;
+            }
+
+            /* alloc link and append it to list */
+            (*ppl) = pl = alloc_paddr_link(d);
+            /* Were we able to allocate a link? */
+            if ( pl == NULL ) /* no */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] alloc failure\n",
+                         gfn );
+                
+                goto out;
+            }
+        }
+        if ( pl->sl1ma != sl1ma )
+        {
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+            range->nr_mappings++;
+        }
+        effective = 1;
+        if ( len > range->mappings_hwm )
+        {
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d "
+                     "freepages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     range->nr_mappings,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                     gfn, sl1ma,
+                     range->nr_mappings - 1);
+#endif
+            free_paddr_link(d, ppl, pl);
+            --range->nr_mappings;
+            effective = 1;
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        /* Does the frame buffer have an incomplete set of mappings? */
+        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* yes */
+            dirty = 1;
+        else /* The frame buffer's set of mappings is complete.  Scan it. */
+            for ( pl = &range->pl_tab[i].mapping;
+                  pl;
+                  pl = pl->pl_next, len++ )
+            {
+                l1_pgentry_t *sl1e;
+                paddr_t sl1ma = pl->sl1ma;
+                if (sl1ma == INVALID_PADDR) /* FB page is unmapped */
+                    continue;
+                sl1e = maddr_to_virt(sl1ma);
+                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                {
+                    dirty = 1;
+                    /* Clear dirty so we can detect if page gets re-dirtied.
+                     * Note: this is atomic, so we may clear a
+                     * _PAGE_ACCESSED set by another processor.
+                     */
+                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                    flush_tlb = 1;
+                }
+            } /* for */
+        
+        if ( dirty )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
+     * is a pfn beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if (!nr)
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range(), which interrogates each vram
+ * page's p2m type looking for pages that have been made writable.
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    uint8_t *dirty_bitmap = NULL;
+
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
+        
+        if ( !paging_mode_log_dirty(d) )
+        {
+            hap_logdirty_init(d);
+            rc = paging_log_dirty_enable(d);
+            if ( rc )
+                goto out;
+        }
+
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes( size );
+        if ( !dirty_bitmap )
+            goto out;
+        
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( !dirty_vram ) 
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+        }
+        
+        range = dirty_vram_range_find(d, begin_pfn, nr);
+        if ( !range )
+        {
+            rc = -ENOMEM;
+            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+            
+            paging_unlock(d);
+            
+            /* set l1e entries of range within P2M table to be read-only. */
+            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
+                                  p2m_ram_rw, p2m_ram_logdirty);
+            
+            flush_tlb_mask(d->domain_dirty_cpumask);
+            
+            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+        }
+        else
+        {
+            paging_unlock(d);
+            
+            domain_pause(d);
+            
+            /* get the bitmap */
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+            
+            domain_unpause(d);
+        }
+        
+        
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else {
+        paging_lock(d);
+        
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( dirty_vram )
+        {
+            /*
+             * If zero pages specified while tracking dirty vram
+             * then stop tracking
+             */
+            dirty_vram_free(d);
+        
+        }
+        
+        paging_unlock(d);
+    }
+out:
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+    
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index a95ccbf..f7d979b 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -53,117 +53,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-/*
- * hap_track_dirty_vram()
- * Create the domain's dv_dirty_vram struct on demand.
- * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
- * first encountered.
- * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
- * calling paging_log_dirty_range(), which interrogates each vram
- * page's p2m type looking for pages that have been made writable.
- */
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram;
-    uint8_t *dirty_bitmap = NULL;
-
-    if ( nr )
-    {
-        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
-
-        if ( !paging_mode_log_dirty(d) )
-        {
-            hap_logdirty_init(d);
-            rc = paging_log_dirty_enable(d);
-            if ( rc )
-                goto out;
-        }
-
-        rc = -ENOMEM;
-        dirty_bitmap = xzalloc_bytes(size);
-        if ( !dirty_bitmap )
-            goto out;
-
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
-            {
-                paging_unlock(d);
-                goto out;
-            }
-
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-        }
-
-        if ( begin_pfn != dirty_vram->begin_pfn ||
-             begin_pfn + nr != dirty_vram->end_pfn )
-        {
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-
-            paging_unlock(d);
-
-            /* set l1e entries of range within P2M table to be read-only. */
-            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
-                                  p2m_ram_rw, p2m_ram_logdirty);
-
-            flush_tlb_mask(d->domain_dirty_cpumask);
-
-            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
-        }
-        else
-        {
-            paging_unlock(d);
-
-            domain_pause(d);
-
-            /* get the bitmap */
-            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-
-            domain_unpause(d);
-        }
-
-        rc = -EFAULT;
-        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
-            rc = 0;
-    }
-    else
-    {
-        paging_lock(d);
-
-        dirty_vram = d->arch.hvm_domain.dirty_vram;
-        if ( dirty_vram )
-        {
-            /*
-             * If zero pages specified while tracking dirty vram
-             * then stop tracking
-             */
-            xfree(dirty_vram);
-            d->arch.hvm_domain.dirty_vram = NULL;
-        }
-
-        paging_unlock(d);
-    }
-out:
-    if ( dirty_bitmap )
-        xfree(dirty_bitmap);
-
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 258f46e..41d0fe3 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -690,20 +690,23 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
-
     p2m_lock(p2m);
-    p2m->defer_nested_flush = 1;
 
+    p2m->defer_nested_flush = 1;
+    
     for ( gfn = start; gfn < end; gfn++ )
     {
         mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
         if ( pt == ot )
-            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
+                          p2m->default_access);
     }
-
+    
     p2m->defer_nested_flush = 0;
+
     if ( nestedhvm_enabled(d) )
         p2m_flush_nestedp2m(d);
+
     p2m_unlock(p2m);
 }
 
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index a5cdbd1..cd44f6e 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
     return ret;
 }
 
-/* Mark a page as dirty */
+/* Given a guest mfn, mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
     mfn_t gmfn;
-    int changed;
-    mfn_t mfn, *l4, *l3, *l2;
-    unsigned long *l1;
-    int i1, i2, i3, i4;
 
     gmfn = _mfn(guest_mfn);
 
@@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+    paging_mark_dirty_gpfn(d, pfn);
+}
+
+
+/* Given a guest pfn, mark a page as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
+{
+    int changed;
+    mfn_t mfn, *l4, *l3, *l2;
+    unsigned long *l1;
+    int i1, i2, i3, i4;
+    dv_range_t *range;
+    
     /* Shared MFNs should NEVER be marked dirty */
     BUG_ON(SHARED_M2P(pfn));
 
@@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
+    d->arch.paging.log_dirty.dirty_count++;
+    range = dirty_vram_range_find_gfn(d, pfn);
+    if ( range )
+        range->dirty_count++;
+
     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
     {
          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
@@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int i;
     unsigned long pfn;
+    dv_range_t *range;
+    unsigned int range_dirty_count;
 
+    paging_lock(d);
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( !range )
+    {
+        paging_unlock(d);
+        goto out;
+    }
+    
+    range_dirty_count = range->dirty_count;
+    range->dirty_count = 0;
+
+    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
+
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 begin_pfn + nr,
+                 range_dirty_count);
     /*
      * Set l1e entries of P2M table to be read-only.
      *
@@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
 
     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
     {
-        p2m_type_t pt;
-        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
-        if ( pt == p2m_ram_rw )
+        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
+             p2m_ram_rw )
             dirty_bitmap[i >> 3] |= (1 << (i & 7));
     }
 
     p2m_unlock(p2m);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
+
+ out:
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ce79131..1e4b880 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3464,178 +3460,217 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 /**************************************************************************/
 /* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
 
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          0
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while ( pl != NULL )
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if ( sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if ( sl1mn == mfn_x(smfn) ) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if ( mappings > max_mappings )
+            max_mappings = mappings;
+        
+        if ( unshadowed ) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
+                     "max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i];
+              x;
+              x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 4967da1..bb983bc 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to (or
+ * cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..53be66e
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,202 @@
+/****************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this by
+ * recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 64
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+typedef struct dv_pl_entry {
+    dv_paddr_link_t mapping;
+    bool_t stuck_dirty;
+} dv_pl_entry_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the
+ * level 1 PTEs that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..0cc7b05 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram  *dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index c3a8848..e22df38 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
                            int  (*disable_log_dirty)(struct domain *d),
                            void (*clean_dirty_bitmap)(struct domain *d));
 
-/* mark a page as dirty */
+/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
+/* mark a gpfn as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
+
+
 /* is this guest page dirty? 
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
@@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2012-11-15 13:22 ` Tim Deegan
@ 2012-11-27 15:58   ` Robert Phillips
  0 siblings, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2012-11-27 15:58 UTC (permalink / raw)
  To: Tim (Xen.org); +Cc: xen-devel

Hi,

The updated patch was sent out just a few minutes ago.


> -----Original Message-----
> From: Tim Deegan [mailto:tim@xen.org]
> Sent: Thursday, November 15, 2012 8:23 AM
> To: Robert Phillips
> Cc: xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers
> in Xen
> 
> Hi,
> 
> We're very nearly there now.  I think I agree on almost all the technical
> decisions but there are still a few things to tidy up (some of which I
> mentioned before).
> 
> At 16:31 -0500 on 12 Nov (1352737913), Robert Phillips wrote:
> > Support is provided for both shadow and hardware assisted paging (HAP)
> modes.
> > This code bookkeeps the set of video frame buffers (vram), detects
> > when the guest has modified any of those buffers and, upon request,
> > returns a bitmap of the modified pages.
> > This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> > Each monitor has a frame buffer of some size at some position in guest
> physical memory.
> > The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> > (Version 3 of this patch.)
> 
> Please linewrap at something less than 80 characters.

Done

> 
> > diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> > index eea5555..e374aac 100644
> > --- a/xen/arch/x86/hvm/Makefile
> > +++ b/xen/arch/x86/hvm/Makefile
> > @@ -22,4 +22,4 @@ obj-y += vlapic.o
> >  obj-y += vmsi.o
> >  obj-y += vpic.o
> >  obj-y += vpt.o
> > -obj-y += vpmu.o
> > \ No newline at end of file
> > +obj-y += vpmu.o
> 
> This is an unrelated fix, so doesn't belong in this changeset.

Removed

> 
> > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index
> > 34da2f5..3a3e5e4 100644
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -57,6 +57,7 @@
> >  #include <asm/hvm/cacheattr.h>
> >  #include <asm/hvm/trace.h>
> >  #include <asm/hvm/nestedhvm.h>
> > +#include <asm/dirty_vram.h>
> >  #include <asm/mtrr.h>
> >  #include <asm/apic.h>
> >  #include <public/sched.h>
> > @@ -66,6 +67,7 @@
> >  #include <asm/mem_event.h>
> >  #include <asm/mem_access.h>
> >  #include <public/mem_event.h>
> > +#include "../mm/mm-locks.h"
> >
> >  bool_t __read_mostly hvm_enabled;
> >
> > @@ -1433,8 +1435,20 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
> >           */
> >          if ( access_w )
> >          {
> > +            p2m_type_t pt;
> > +            pt = p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
> > + p2m_ram_rw);
> > +
> > +            paging_lock(v->domain);
> > +            if ( pt == p2m_ram_logdirty )
> > +            {
> > +                dv_range_t *range;
> > +                v->domain->arch.paging.log_dirty.dirty_count++;
> > +                range = dirty_vram_range_find_gfn(v->domain, gfn);
> > +                if ( range )
> > +                    range->dirty_count++;
> > +            }
> >              paging_mark_dirty(v->domain, mfn_x(mfn));
> > -            p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
> p2m_ram_rw);
> > +            paging_unlock(v->domain);
> 
> This is much nicer than the previous version, but I think it would be even
> better if this bookkeeping went into paging_mark_dirty() so that the other
> callers of paging_mark_dirty() also DTRT with the vram map.
> That would avoid leaking mm-locks.h into this non-mm code, too.
> 
> Then this change becomes just swapping the order of the two lines (and
> perhaps a comment to say why).

Done.  I had to split paging_mark_dirty() into two functions -- the old function does the gfn-to-mfn translation and then calls the new function paging_mark_dirty_gpfn() which does the heavy lifiting.  
The code above now calls the new function thereby bypassing the gfn-to-mfn translation.

> 
> > diff --git a/xen/arch/x86/mm/dirty_vram.c
> > b/xen/arch/x86/mm/dirty_vram.c new file mode 100644 index
> > 0000000..e3c7c1f
> > --- /dev/null
> > +++ b/xen/arch/x86/mm/dirty_vram.c
> > @@ -0,0 +1,992 @@
> > +/*
> > + * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
> > + * with support for multiple frame buffers.
> > + *
> > + * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
> 
> Please bring in the copyright and authorship notices for the files you copied
> code from.  That's at least mm/shadow/common.c and mm/hap/hap.c.

Done

> 
> Apart from that this is looking good.
> 
> Are you willing to take on maintainership of this feature (that is, to respond
> to questions and fix bugs)? 

Yes

 >If so, we should make an update to the
> MAINTAINERS file for xen/arch/x86/mm/dirty_vram.c and xen/include/asm-
> x86/dirty_vram.h.  That can happen separately, as it'll need an ack from the
> other maintainers.
> 
> Cheers,
> 
> Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2012-11-27 15:52 Robert Phillips
  0 siblings, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2012-11-27 15:52 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP) modes.
This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
Each monitor has a frame buffer of some size at some position in guest physical memory.
The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.
(Version 4 of this patch.)

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   17 +-
 xen/arch/x86/hvm/hvm.c           |    9 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     | 1000 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  140 +-----
 xen/arch/x86/mm/paging.c         |  222 +++------
 xen/arch/x86/mm/shadow/common.c  |  334 +++++++------
 xen/arch/x86/mm/shadow/multi.c   |  174 +++----
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  202 ++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   23 +-
 xen/include/asm-x86/shadow.h     |    6 -
 15 files changed, 1574 insertions(+), 568 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 7eb5743..693d7fe 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1552,15 +1552,20 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 34da2f5..4c4d438 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -66,6 +67,7 @@
 #include <asm/mem_event.h>
 #include <asm/mem_access.h>
 #include <public/mem_event.h>
+#include "../mm/mm-locks.h"
 
 bool_t __read_mostly hvm_enabled;
 
@@ -1433,8 +1435,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
+                                 p2m_ram_rw) == p2m_ram_logdirty )
+            {
+                paging_mark_dirty_gpfn(v->domain, gfn);
+            }
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..32fcc13
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,1000 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          0
+#define DEBUG_allocating_dirty_vram_range     0
+#define DEBUG_high_water_mark_for_vram_ranges 0
+#define DEBUG_freeing_dirty_vram_range        0
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+#define DEBUG_alloc_paddr_inject_fault        0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            struct dv_paddr_link_ext *ext =
+                container_of(
+                    curr, struct dv_paddr_link_ext, ext_link);
+            struct page_info *pg = __virt_to_page(ext);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_pl_entry_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].mapping.pl_next;
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != NULL )
+                        plx = plx->pl_next;
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn )
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == NULL ) /* yes */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+        dv_paddr_link_ext_t *ext = __page_to_virt(pg);
+        if ( ext == NULL )
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walks has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor element's next member.
+ *
+ * After linking the precessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and free it.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if ( ppl ) /* yes. free it */
+    {
+        ASSERT(pl == (*ppl));
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /*
+         * move 2nd mapping to main table.
+         * and free 2nd mapping
+         */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if ( spl == NULL )
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    dv_paddr_link_t **ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = &range->pl_tab[ i ].mapping;
+    ppl = NULL;
+
+    /*
+     * find matching entry (pl), if any, and its predecessor
+     * in linked list (ppl)
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+            
+        ppl = &pl->pl_next;
+        pl = *ppl;
+        len++;
+    }
+
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* no, so we'll need to alloc a link */
+        {
+            ASSERT(ppl != NULL);
+            
+#if DEBUG_alloc_paddr_inject_fault
+            {
+                static int counter;
+                
+                /* Test stuck_dirty logic for some cases */
+                if ( (++counter) % 4 == 0 )
+                {
+                    /* Simply mark the frame buffer page as always dirty */
+                    range->pl_tab[ i ].stuck_dirty = 1;
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] inject stuck dirty fault\n",
+                             gfn );
+                    goto out;
+                }
+            }
+#endif
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if (len > DV_ADDR_LINK_LIST_LIMIT) /* yes */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] link limit exceeded\n",
+                         gfn );
+                
+                goto out;
+            }
+
+            /* alloc link and append it to list */
+            (*ppl) = pl = alloc_paddr_link(d);
+            /* Were we able to allocate a link? */
+            if ( pl == NULL ) /* no */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] alloc failure\n",
+                         gfn );
+                
+                goto out;
+            }
+        }
+        if ( pl->sl1ma != sl1ma )
+        {
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+            range->nr_mappings++;
+        }
+        effective = 1;
+        if ( len > range->mappings_hwm )
+        {
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d "
+                     "freepages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     range->nr_mappings,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                     gfn, sl1ma,
+                     range->nr_mappings - 1);
+#endif
+            free_paddr_link(d, ppl, pl);
+            --range->nr_mappings;
+            effective = 1;
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        /* Does the frame buffer have an incomplete set of mappings? */
+        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* yes */
+            dirty = 1;
+        else /* The frame buffer's set of mappings is complete.  Scan it. */
+            for ( pl = &range->pl_tab[i].mapping;
+                  pl;
+                  pl = pl->pl_next, len++ )
+            {
+                l1_pgentry_t *sl1e;
+                paddr_t sl1ma = pl->sl1ma;
+                if (sl1ma == INVALID_PADDR) /* FB page is unmapped */
+                    continue;
+                sl1e = maddr_to_virt(sl1ma);
+                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                {
+                    dirty = 1;
+                    /* Clear dirty so we can detect if page gets re-dirtied.
+                     * Note: this is atomic, so we may clear a
+                     * _PAGE_ACCESSED set by another processor.
+                     */
+                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                    flush_tlb = 1;
+                }
+            } /* for */
+        
+        if ( dirty )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
+     * is a pfn beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if (!nr)
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_enable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-only.
+ */
+static int hap_enable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    /* turn on PG_log_dirty bit in paging mode */
+    paging_lock(d);
+    d->arch.paging.mode |= PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    /*
+     * dirty_vram != NULL iff we're tracking dirty vram.
+     * If we start tracking dirty pages for all memory then
+     * the dirty_vram structure is freed.
+     */
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table to be read-only. */
+    list_for_each(curr, &dirty_vram->range_head)
+    {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] enable  vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+			      p2m_ram_rw, p2m_ram_logdirty);
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if ( rc )
+    {
+        paging_lock(d);
+        d->arch.paging.mode &= ~PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/*
+ * hap_disable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-write.
+ */
+static int hap_disable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    paging_lock(d);
+    d->arch.paging.mode &= ~PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table with normal mode */
+    list_for_each(curr, &dirty_vram->range_head)
+    {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] disable vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+			      p2m_ram_logdirty, p2m_ram_rw);
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if ( rc )
+    {
+        paging_lock(d);
+        d->arch.paging.mode |= PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/*
+ * hap_clean_vram_tracking_range()
+ * For all the pages in the range specified by [begin_pfn,nr),
+ * note in the dirty bitmap any page that has been marked as read-write,
+ * which signifies that the page has been dirtied, and reset the page
+ * to ram_logdirty.
+ */
+void hap_clean_vram_tracking_range(struct domain *d,
+                                   unsigned long begin_pfn,
+                                   unsigned long nr,
+                                   uint8_t *dirty_bitmap)
+{
+    int i;
+    unsigned long pfn;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+
+    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
+    ASSERT(paging_locked_by_me(d));
+
+    if ( !dirty_vram )
+    {
+        gdprintk(XENLOG_DEBUG,
+                 "Should only be called while tracking dirty vram.\n");
+        return;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        return;
+
+    /* set l1e entries of P2M table to be read-only. */
+    /*
+     * On first write, it page faults, its entry is changed to read-write,
+     * its bit in the dirty bitmap is set, and on retry the write succeeds.
+     */
+    for ( i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++ )
+    {
+        p2m_type_t pt;
+        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
+        if ( pt == p2m_ram_rw )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+static void hap_vram_tracking_init(struct domain *d)
+{
+    paging_log_dirty_init(d, hap_enable_vram_tracking,
+                          hap_disable_vram_tracking,
+                          NULL);
+}
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range().
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+
+    paging_lock(d);
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_LONG - 1 ) & ~( BITS_PER_LONG - 1 );
+        uint8_t dirty_bitmap[size];
+        bool_t new_range = 0;
+
+        /* Already tracking dirty vram? */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
+        {
+            /* Handle the addition of another range */
+            range = dirty_vram_range_find(d, begin_pfn, nr);
+            if ( !range )
+            {
+                rc = -ENOMEM;
+                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+                    goto param_fail;
+                new_range = 1;
+            }
+        }
+        /* Just starting to track dirty vram? */
+        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+                goto param_fail;
+
+            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn,
+                                                          nr)) )
+                goto param_fail;
+
+            new_range = 1;
+            
+            /* Initialize callbacks for vram tracking */
+            hap_vram_tracking_init(d);
+
+            /* Enable HAP vram tracking */
+            paging_unlock(d);
+            rc = paging_log_dirty_enable(d);
+            paging_lock(d);
+            
+            if ( rc != 0 )
+                goto param_fail;
+        }
+        else
+        {
+            /* Test for invalid combination */
+            if ( !paging_mode_log_dirty(d) && dirty_vram )
+                rc = -EINVAL;
+            else /* logging dirty of all memory, not tracking dirty vram */
+                rc = -ENODATA;
+            goto param_fail;
+        }
+
+        paging_unlock(d);
+        /* Is this query the very first for this range? */
+        if ( new_range ) /* yes */
+            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+        else
+        {
+            memset(dirty_bitmap, 0x00, size);
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+        }
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else
+    {
+        /*
+         * If zero pages specified while already tracking dirty vram
+         * then stop tracking
+         */
+        if ( paging_mode_log_dirty(d) && dirty_vram )
+        {
+            /* Disable HAP vram tracking */
+            paging_unlock(d);
+            rc = paging_log_dirty_disable(d);
+            paging_lock(d);
+            
+            dirty_vram_free(d);
+        }
+        else /* benign no-op */
+        {
+            rc = 0;
+        }
+        paging_unlock(d);
+    }
+
+    return rc;
+
+param_fail:
+    dirty_vram_free(d);
+    paging_unlock(d);
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index fd99cde..09cdba2 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -41,6 +41,7 @@
 #include <asm/domain.h>
 #include <xen/numa.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 
 #include "private.h"
 
@@ -53,139 +54,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-static int hap_enable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    /* turn on PG_log_dirty bit in paging mode */
-    paging_lock(d);
-    d->arch.paging.mode |= PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static int hap_disable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    paging_lock(d);
-    d->arch.paging.mode &= ~PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table with normal mode */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_logdirty, p2m_ram_rw);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static void hap_clean_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return;
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-}
-
-static void hap_vram_tracking_init(struct domain *d)
-{
-    paging_log_dirty_init(d, hap_enable_vram_tracking,
-                          hap_disable_vram_tracking,
-                          hap_clean_vram_tracking);
-}
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( nr )
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram )
-        {
-            if ( begin_pfn != dirty_vram->begin_pfn ||
-                 begin_pfn + nr != dirty_vram->end_pfn )
-            {
-                paging_log_dirty_disable(d);
-                dirty_vram->begin_pfn = begin_pfn;
-                dirty_vram->end_pfn = begin_pfn + nr;
-                rc = paging_log_dirty_enable(d);
-                if (rc != 0)
-                    goto param_fail;
-            }
-        }
-        else if ( !paging_mode_log_dirty(d) && !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-                goto param_fail;
-
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-            hap_vram_tracking_init(d);
-            rc = paging_log_dirty_enable(d);
-            if (rc != 0)
-                goto param_fail;
-        }
-        else
-        {
-            if ( !paging_mode_log_dirty(d) && dirty_vram )
-                rc = -EINVAL;
-            else
-                rc = -ENODATA;
-            goto param_fail;
-        }
-        /* get the bitmap */
-        rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-    }
-    else
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram ) {
-            rc = paging_log_dirty_disable(d);
-            xfree(dirty_vram);
-            dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-        } else
-            rc = 0;
-    }
-
-    return rc;
-
-param_fail:
-    if ( dirty_vram )
-    {
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
@@ -223,14 +91,12 @@ static void hap_clean_dirty_bitmap(struct domain *d)
 
 void hap_logdirty_init(struct domain *d)
 {
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( paging_mode_log_dirty(d) && dirty_vram )
     {
         paging_log_dirty_disable(d);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+        dirty_vram_free(d);
     }
-
     /* Reinitialize logdirty mechanism */
     paging_log_dirty_init(d, hap_enable_log_dirty,
                           hap_disable_log_dirty,
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index ea44e39..f0ac512 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
     return ret;
 }
 
-/* Mark a page as dirty */
+/* Given a guest mfn, mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
     mfn_t gmfn;
-    int changed;
-    mfn_t mfn, *l4, *l3, *l2;
-    unsigned long *l1;
-    int i1, i2, i3, i4;
 
     gmfn = _mfn(guest_mfn);
 
@@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+    paging_mark_dirty_gpfn(d, pfn);
+}
+
+
+/* Given a guest pfn, mark a page as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
+{
+    int changed;
+    mfn_t mfn, *l4, *l3, *l2;
+    unsigned long *l1;
+    int i1, i2, i3, i4;
+    dv_range_t *range;
+    
     /* Shared MFNs should NEVER be marked dirty */
     BUG_ON(SHARED_M2P(pfn));
 
@@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
+    d->arch.paging.log_dirty.dirty_count++;
+    range = dirty_vram_range_find_gfn(d, pfn);
+    if ( range )
+        range->dirty_count++;
+
     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
     {
          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
@@ -333,8 +348,11 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
     mfn_t *l4, *l3, *l2;
     unsigned long *l1;
     int i4, i3, i2;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     domain_pause(d);
+    /* Locking hierarchy requires p2m lock to be taken first */
+    p2m_lock(p2m);
     paging_lock(d);
 
     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
@@ -345,6 +363,16 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
                  d->arch.paging.log_dirty.fault_count,
                  d->arch.paging.log_dirty.dirty_count);
 
+    if ( hap_enabled(d) && d->arch.hvm_domain.dirty_vram )
+    {
+        /*
+         * If we're cleaning/peeking all guest memory, we should not
+         * be tracking dirty vram.
+         */
+        rv = -EINVAL;
+        goto out;
+    }
+
     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
 
@@ -424,170 +452,64 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 
     if ( clean )
     {
-        /* We need to further call clean_dirty_bitmap() functions of specific
-         * paging modes (shadow or hap).  Safe because the domain is paused. */
-        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        /* Is null if tracking dirty vram */
+        if ( d->arch.paging.log_dirty.clean_dirty_bitmap )
+        {
+            /*
+             * We need to call clean_dirty_bitmap() functions of specific
+             * paging modes (shadow or hap).
+             * Safe because the domain is paused.
+             */
+            d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        }
     }
     domain_unpause(d);
     return rv;
 
  out:
     paging_unlock(d);
+    p2m_unlock(p2m);
     domain_unpause(d);
     return rv;
 }
 
-int paging_log_dirty_range(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+void paging_log_dirty_range(struct domain *d,
+                           unsigned long begin_pfn,
+                           unsigned long nr,
+                           uint8_t *dirty_bitmap)
 {
-    int rv = 0;
-    unsigned long pages = 0;
-    mfn_t *l4, *l3, *l2;
-    unsigned long *l1;
-    int b1, b2, b3, b4;
-    int i2, i3, i4;
-
-    d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    dv_range_t *range;
+    unsigned int range_dirty_count = 0;
+    
+    p2m_lock(p2m);
     paging_lock(d);
 
-    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
-                 d->domain_id,
-                 d->arch.paging.log_dirty.fault_count,
-                 d->arch.paging.log_dirty.dirty_count);
-
-    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
-        printk("%s: %d failed page allocs while logging dirty pages\n",
-               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
-        rv = -ENOMEM;
-        goto out;
-    }
-
-    if ( !d->arch.paging.log_dirty.fault_count &&
-         !d->arch.paging.log_dirty.dirty_count ) {
-        unsigned int size = BITS_TO_LONGS(nr);
-
-        if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
-            rv = -EFAULT;
-        goto out;
-    }
-    d->arch.paging.log_dirty.fault_count = 0;
-    d->arch.paging.log_dirty.dirty_count = 0;
-
-    b1 = L1_LOGDIRTY_IDX(begin_pfn);
-    b2 = L2_LOGDIRTY_IDX(begin_pfn);
-    b3 = L3_LOGDIRTY_IDX(begin_pfn);
-    b4 = L4_LOGDIRTY_IDX(begin_pfn);
-    l4 = paging_map_log_dirty_bitmap(d);
-
-    for ( i4 = b4;
-          (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
-          i4++ )
+    /* Only called when tracking dirty vram in HAP mode */
+    ASSERT(hap_enabled(d) && d->arch.hvm_domain.dirty_vram);
+    
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( range )
     {
-        l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-        for ( i3 = b3;
-              (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
-              i3++ )
-        {
-            l2 = ((l3 && mfn_valid(l3[i3])) ?
-                  map_domain_page(mfn_x(l3[i3])) : NULL);
-            for ( i2 = b2;
-                  (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
-                  i2++ )
-            {
-                unsigned int bytes = PAGE_SIZE;
-                uint8_t *s;
-                l1 = ((l2 && mfn_valid(l2[i2])) ?
-                      map_domain_page(mfn_x(l2[i2])) : NULL);
-
-                s = ((uint8_t*)l1) + (b1 >> 3);
-                bytes -= b1 >> 3;
-
-                if ( likely(((nr - pages + 7) >> 3) < bytes) )
-                    bytes = (unsigned int)((nr - pages + 7) >> 3);
-
-                if ( !l1 )
-                {
-                    if ( clear_guest_offset(dirty_bitmap, pages >> 3,
-                                            bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                /* begin_pfn is not 32K aligned, hence we have to bit
-                 * shift the bitmap */
-                else if ( b1 & 0x7 )
-                {
-                    int i, j;
-                    uint32_t *l = (uint32_t*) s;
-                    int bits = b1 & 0x7;
-                    int bitmask = (1 << bits) - 1;
-                    int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
-                    unsigned long bitmap[size];
-                    static unsigned long printed = 0;
-
-                    if ( printed != begin_pfn )
-                    {
-                        dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
-                                __FUNCTION__, begin_pfn);
-                        printed = begin_pfn;
-                    }
-
-                    for ( i = 0; i < size - 1; i++, l++ ) {
-                        bitmap[i] = ((*l) >> bits) |
-                            (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
-                    }
-                    s = (uint8_t*) l;
-                    size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
-                    bitmap[i] = 0;
-                    for ( j = 0; j < size; j++, s++ )
-                        bitmap[i] |= (*s) << (j * 8);
-                    bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
-                    if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
-                                (uint8_t*) bitmap, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                else
-                {
-                    if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
-                                              s, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-
-                pages += bytes << 3;
-                if ( l1 )
-                {
-                    clear_page(l1);
-                    unmap_domain_page(l1);
-                }
-                b1 = b1 & 0x7;
-            }
-            b2 = 0;
-            if ( l2 )
-                unmap_domain_page(l2);
-        }
-        b3 = 0;
-        if ( l3 )
-            unmap_domain_page(l3);
+        range_dirty_count = range->dirty_count;
+        range->dirty_count = 0;
     }
-    if ( l4 )
-        unmap_domain_page(l4);
-
-    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
 
-    return rv;
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 range->end_pfn,
+                 range_dirty_count);
 
+    hap_clean_vram_tracking_range(d, begin_pfn, nr, dirty_bitmap);
  out:
     paging_unlock(d);
-    return rv;
+    p2m_unlock(p2m);
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ce79131..30829b6 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3463,177 +3459,209 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 
 /**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
-
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          1
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ((l1_shadow_mask & (1 << shadow_type)) == 0)
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for (i = 0; i != range->end_pfn - range->begin_pfn; i++)
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while (pl != NULL)
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if (sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if (sl1mn == mfn_x(smfn)) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if (mappings > max_mappings)
+            max_mappings = mappings;
+        
+        if (unshadowed) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
+
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
+
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
 /**************************************************************************/
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 4967da1..bb983bc 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to (or
+ * cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..b107d0e
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,202 @@
+/****************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this by
+ * recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 20
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+typedef struct dv_pl_entry {
+    dv_paddr_link_t mapping;
+    bool_t stuck_dirty;
+} dv_pl_entry_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the
+ * level 1 PTEs that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..6146542 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram * dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 9a40f2c..e7d4cb3 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -137,10 +137,10 @@ struct paging_mode {
 void paging_free_log_dirty_bitmap(struct domain *d);
 
 /* get the dirty bitmap for a specific range of pfns */
-int paging_log_dirty_range(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+void paging_log_dirty_range(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            uint8_t *dirty_bitmap);
 
 /* enable log dirty */
 int paging_log_dirty_enable(struct domain *d);
@@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
                            int  (*disable_log_dirty)(struct domain *d),
                            void (*clean_dirty_bitmap)(struct domain *d));
 
-/* mark a page as dirty */
+/* mark a page as dirty, a wrapper around mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
+/* mark a page as dirty */
+void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
+
+
 /* is this guest page dirty? 
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
@@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2012-11-15 15:38 Fabio Fantoni
@ 2012-11-20  1:23 ` Ben Guthro
  0 siblings, 0 replies; 35+ messages in thread
From: Ben Guthro @ 2012-11-20  1:23 UTC (permalink / raw)
  To: fantonifabio; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1207 bytes --]

On Thu, Nov 15, 2012 at 10:38 AM, Fabio Fantoni <fantonifabio@tiscali.it>wrote:

> Support is provided for both shadow and hardware assisted paging (HAP)
>> modes.
>> This code bookkeeps the set of video frame buffers (vram),
>> detects when the guest has modified any of those buffers and, upon
>> request,
>> returns a bitmap of the modified pages.
>> This lets other software components re-paint the portions of the monitor
>> (or
>> monitors) that have changed.
>> Each monitor has a frame buffer of some size at some position in guest
>> physical
>> memory.
>> The set of frame buffers being tracked can change over time as monitors
>> are
>> plugged and unplugged.
>> (Version 3 of this patch.)
>>
> Is this patch intended for manage multiple indipendent graphic cards on
> xen domU?
> If yes is also intended for both pv and hvm domU?
>
>
No, it is intended to track multiple vram regions, for multiple monitors.

See the original thread, where Robert includes a PDF describing some
background for this patch.
http://markmail.org/message/edw5g5ihhfmcwxov





>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>
>

[-- Attachment #1.2: Type: text/html, Size: 2015 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
@ 2012-11-15 15:38 Fabio Fantoni
  2012-11-20  1:23 ` Ben Guthro
  0 siblings, 1 reply; 35+ messages in thread
From: Fabio Fantoni @ 2012-11-15 15:38 UTC (permalink / raw)
  To: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 744 bytes --]

> Support is provided for both shadow and hardware assisted paging (HAP) modes.
> This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor (or
> monitors) that have changed.
> Each monitor has a frame buffer of some size at some position in guest physical
> memory.
> The set of frame buffers being tracked can change over time as monitors are
> plugged and unplugged.
> (Version 3 of this patch.)
Is this patch intended for manage multiple indipendent graphic cards on 
xen domU?
If yes is also intended for both pv and hvm domU?


[-- Attachment #1.2: Firma crittografica S/MIME --]
[-- Type: application/pkcs7-signature, Size: 4510 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] Provide support for multiple frame buffers in Xen
  2012-11-12 21:31 Robert Phillips
@ 2012-11-15 13:22 ` Tim Deegan
  2012-11-27 15:58   ` Robert Phillips
  0 siblings, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2012-11-15 13:22 UTC (permalink / raw)
  To: Robert Phillips; +Cc: xen-devel

Hi, 

We're very nearly there now.  I think I agree on almost all the
technical decisions but there are still a few things to tidy up (some of
which I mentioned before).

At 16:31 -0500 on 12 Nov (1352737913), Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP) modes.
> This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.
> (Version 3 of this patch.)

Please linewrap at something less than 80 characters.

> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index eea5555..e374aac 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -22,4 +22,4 @@ obj-y += vlapic.o
>  obj-y += vmsi.o
>  obj-y += vpic.o
>  obj-y += vpt.o
> -obj-y += vpmu.o
> \ No newline at end of file
> +obj-y += vpmu.o

This is an unrelated fix, so doesn't belong in this changeset.

> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 34da2f5..3a3e5e4 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -57,6 +57,7 @@
>  #include <asm/hvm/cacheattr.h>
>  #include <asm/hvm/trace.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/apic.h>
>  #include <public/sched.h>
> @@ -66,6 +67,7 @@
>  #include <asm/mem_event.h>
>  #include <asm/mem_access.h>
>  #include <public/mem_event.h>
> +#include "../mm/mm-locks.h"
>  
>  bool_t __read_mostly hvm_enabled;
>  
> @@ -1433,8 +1435,20 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
>           */
>          if ( access_w )
>          {
> +            p2m_type_t pt;
> +            pt = p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +            
> +            paging_lock(v->domain);
> +            if ( pt == p2m_ram_logdirty )
> +            {
> +                dv_range_t *range;
> +                v->domain->arch.paging.log_dirty.dirty_count++;
> +                range = dirty_vram_range_find_gfn(v->domain, gfn);
> +                if ( range )
> +                    range->dirty_count++;
> +            }
>              paging_mark_dirty(v->domain, mfn_x(mfn));
> -            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +            paging_unlock(v->domain);

This is much nicer than the previous version, but I think it would be
even better if this bookkeeping went into paging_mark_dirty() so that
the other callers of paging_mark_dirty() also DTRT with the vram map.
That would avoid leaking mm-locks.h into this non-mm code, too.

Then this change becomes just swapping the order of the two lines (and
perhaps a comment to say why).

> diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
> new file mode 100644
> index 0000000..e3c7c1f
> --- /dev/null
> +++ b/xen/arch/x86/mm/dirty_vram.c
> @@ -0,0 +1,992 @@
> +/*
> + * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
> + * with support for multiple frame buffers.
> + *
> + * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)

Please bring in the copyright and authorship notices for the files you
copied code from.  That's at least mm/shadow/common.c and mm/hap/hap.c.

Apart from that this is looking good.  

Are you willing to take on maintainership of this feature (that is, to
respond to questions and fix bugs)?  If so, we should make an update to
the MAINTAINERS file for xen/arch/x86/mm/dirty_vram.c and
xen/include/asm-x86/dirty_vram.h.  That can happen separately, as it'll
need an ack from the other maintainers.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2012-11-12 21:31 Robert Phillips
  2012-11-15 13:22 ` Tim Deegan
  0 siblings, 1 reply; 35+ messages in thread
From: Robert Phillips @ 2012-11-12 21:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP) modes.
This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
Each monitor has a frame buffer of some size at some position in guest physical memory.
The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.
(Version 3 of this patch.)

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   17 +-
 xen/arch/x86/hvm/Makefile        |    2 +-
 xen/arch/x86/hvm/hvm.c           |   16 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     |  992 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  140 +-----
 xen/arch/x86/mm/paging.c         |  196 ++------
 xen/arch/x86/mm/shadow/common.c  |  335 +++++++------
 xen/arch/x86/mm/shadow/multi.c   |  174 +++----
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  201 ++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   17 +-
 xen/include/asm-x86/shadow.h     |    6 -
 16 files changed, 1549 insertions(+), 562 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 7eb5743..693d7fe 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1552,15 +1552,20 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index eea5555..e374aac 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -22,4 +22,4 @@ obj-y += vlapic.o
 obj-y += vmsi.o
 obj-y += vpic.o
 obj-y += vpt.o
-obj-y += vpmu.o
\ No newline at end of file
+obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 34da2f5..3a3e5e4 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -66,6 +67,7 @@
 #include <asm/mem_event.h>
 #include <asm/mem_access.h>
 #include <public/mem_event.h>
+#include "../mm/mm-locks.h"
 
 bool_t __read_mostly hvm_enabled;
 
@@ -1433,8 +1435,20 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
+            p2m_type_t pt;
+            pt = p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            
+            paging_lock(v->domain);
+            if ( pt == p2m_ram_logdirty )
+            {
+                dv_range_t *range;
+                v->domain->arch.paging.log_dirty.dirty_count++;
+                range = dirty_vram_range_find_gfn(v->domain, gfn);
+                if ( range )
+                    range->dirty_count++;
+            }
             paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            paging_unlock(v->domain);
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..e3c7c1f
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,992 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          0
+#define DEBUG_allocating_dirty_vram_range     0
+#define DEBUG_high_water_mark_for_vram_ranges 0
+#define DEBUG_freeing_dirty_vram_range        0
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+#define DEBUG_alloc_paddr_inject_fault        0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            struct dv_paddr_link_ext *ext =
+                container_of(
+                    curr, struct dv_paddr_link_ext, ext_link);
+            struct page_info *pg = __virt_to_page(ext);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_pl_entry_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].mapping.pl_next;
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != NULL )
+                        plx = plx->pl_next;
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn )
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == NULL ) /* yes */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+        dv_paddr_link_ext_t *ext = __page_to_virt(pg);
+        if ( ext == NULL )
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walks has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor element's next member.
+ *
+ * After linking the precessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and free it.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if ( ppl ) /* yes. free it */
+    {
+        ASSERT(pl == (*ppl));
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /*
+         * move 2nd mapping to main table.
+         * and free 2nd mapping
+         */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if ( spl == NULL )
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    dv_paddr_link_t **ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = &range->pl_tab[ i ].mapping;
+    ppl = NULL;
+
+    /*
+     * find matching entry (pl), if any, and its predecessor
+     * in linked list (ppl)
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+            
+        ppl = &pl->pl_next;
+        pl = *ppl;
+        len++;
+    }
+
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* no, so we'll need to alloc a link */
+        {
+            ASSERT(ppl != NULL);
+            
+#if DEBUG_alloc_paddr_inject_fault
+            {
+                static int counter;
+                
+                /* Test stuck_dirty logic for some cases */
+                if ( (++counter) % 4 == 0 )
+                {
+                    /* Simply mark the frame buffer page as always dirty */
+                    range->pl_tab[ i ].stuck_dirty = 1;
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] inject stuck dirty fault\n",
+                             gfn );
+                    goto out;
+                }
+            }
+#endif
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if (len > DV_ADDR_LINK_LIST_LIMIT) /* yes */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] link limit exceeded\n",
+                         gfn );
+                
+                goto out;
+            }
+
+            /* alloc link and append it to list */
+            (*ppl) = pl = alloc_paddr_link(d);
+            /* Were we able to allocate a link? */
+            if ( pl == NULL ) /* no */
+            {
+                /* Simply mark the frame buffer page as always dirty */
+                range->pl_tab[ i ].stuck_dirty = 1;
+                
+                gdprintk(XENLOG_DEBUG,
+                         "[%lx] alloc failure\n",
+                         gfn );
+                
+                goto out;
+            }
+        }
+        if ( pl->sl1ma != sl1ma )
+        {
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+            range->nr_mappings++;
+        }
+        effective = 1;
+        if ( len > range->mappings_hwm )
+        {
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d freepages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     range->nr_mappings,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                     gfn, sl1ma,
+                     range->nr_mappings - 1);
+#endif
+            free_paddr_link(d, ppl, pl);
+            --range->nr_mappings;
+            effective = 1;
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        /* Does the frame buffer have an incomplete set of mappings? */
+        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* yes */
+            dirty = 1;
+        else /* The frame buffer's set of mappings is complete.  Scan it. */
+            for ( pl = &range->pl_tab[i].mapping; pl; pl = pl->pl_next, len++ )
+            {
+                l1_pgentry_t *sl1e;
+                paddr_t sl1ma = pl->sl1ma;
+                if (sl1ma == INVALID_PADDR) /* FB page is unmapped */
+                    continue;
+                sl1e = maddr_to_virt(sl1ma);
+                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                {
+                    dirty = 1;
+                    /* Clear dirty so we can detect if page gets re-dirtied.
+                     * Note: this is atomic, so we may clear a
+                     * _PAGE_ACCESSED set by another processor.
+                     */
+                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                    flush_tlb = 1;
+                }
+            } /* for */
+        
+        if ( dirty )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn is a pfn
+     * beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if (!nr)
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_enable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-only.
+ */
+static int hap_enable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    /* turn on PG_log_dirty bit in paging mode */
+    paging_lock(d);
+    d->arch.paging.mode |= PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    /*
+     * dirty_vram != NULL iff we're tracking dirty vram.
+     * If we start tracking dirty pages for all memory then
+     * the dirty_vram structure is freed.
+     */
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table to be read-only. */
+    list_for_each(curr, &dirty_vram->range_head)
+    {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] enable  vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+			      p2m_ram_rw, p2m_ram_logdirty);
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if ( rc )
+    {
+        paging_lock(d);
+        d->arch.paging.mode &= ~PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/*
+ * hap_disable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-write.
+ */
+static int hap_disable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    paging_lock(d);
+    d->arch.paging.mode &= ~PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table with normal mode */
+    list_for_each(curr, &dirty_vram->range_head)
+    {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] disable vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+			      p2m_ram_logdirty, p2m_ram_rw);
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if ( rc )
+    {
+        paging_lock(d);
+        d->arch.paging.mode |= PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/*
+ * hap_clean_vram_tracking_range()
+ * For all the pages in the range specified by [begin_pfn,nr),
+ * note in the dirty bitmap any page that has been marked as read-write,
+ * which signifies that the page has been dirtied, and reset the page
+ * to ram_logdirty.
+ */
+void hap_clean_vram_tracking_range(struct domain *d,
+                                   unsigned long begin_pfn,
+                                   unsigned long nr,
+                                   uint8_t *dirty_bitmap)
+{
+    int i;
+    unsigned long pfn;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+
+    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
+    ASSERT(paging_locked_by_me(d));
+
+    if ( !dirty_vram )
+    {
+        gdprintk(XENLOG_DEBUG,
+                 "Should only be called while tracking dirty vram.\n");
+        return;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        return;
+
+    /* set l1e entries of P2M table to be read-only. */
+    /*
+     * On first write, it page faults, its entry is changed to read-write,
+     * its bit in the dirty bitmap is set, and on retry the write succeeds.
+     */
+    for ( i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++ )
+    {
+        p2m_type_t pt;
+        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
+        if ( pt == p2m_ram_rw )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+static void hap_vram_tracking_init(struct domain *d)
+{
+    paging_log_dirty_init(d, hap_enable_vram_tracking,
+                          hap_disable_vram_tracking,
+                          NULL);
+}
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range().
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+
+    paging_lock(d);
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_LONG - 1 ) & ~( BITS_PER_LONG - 1 );
+        uint8_t dirty_bitmap[size];
+        bool_t new_range = 0;
+
+        /* Already tracking dirty vram? */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
+        {
+            /* Handle the addition of another range */
+            range = dirty_vram_range_find(d, begin_pfn, nr);
+            if ( !range )
+            {
+                rc = -ENOMEM;
+                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+                    goto param_fail;
+                new_range = 1;
+            }
+        }
+        /* Just starting to track dirty vram? */
+        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+                goto param_fail;
+
+            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn, nr)) )
+                goto param_fail;
+
+            new_range = 1;
+            
+            /* Initialize callbacks for vram tracking */
+            hap_vram_tracking_init(d);
+
+            /* Enable HAP vram tracking */
+            paging_unlock(d);
+            rc = paging_log_dirty_enable(d);
+            paging_lock(d);
+            
+            if ( rc != 0 )
+                goto param_fail;
+        }
+        else
+        {
+            /* Test for invalid combination */
+            if ( !paging_mode_log_dirty(d) && dirty_vram )
+                rc = -EINVAL;
+            else /* logging dirty of all memory, not tracking dirty vram */
+                rc = -ENODATA;
+            goto param_fail;
+        }
+
+        paging_unlock(d);
+        /* Is this query the very first for this range? */
+        if ( new_range ) /* yes */
+            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+        else
+        {
+            memset(dirty_bitmap, 0x00, size);
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+        }
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else
+    {
+        /*
+         * If zero pages specified while already tracking dirty vram
+         * then stop tracking
+         */
+        if ( paging_mode_log_dirty(d) && dirty_vram )
+        {
+            /* Disable HAP vram tracking */
+            paging_unlock(d);
+            rc = paging_log_dirty_disable(d);
+            paging_lock(d);
+            
+            dirty_vram_free(d);
+        }
+        else /* benign no-op */
+        {
+            rc = 0;
+        }
+        paging_unlock(d);
+    }
+
+    return rc;
+
+param_fail:
+    dirty_vram_free(d);
+    paging_unlock(d);
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index fd99cde..09cdba2 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -41,6 +41,7 @@
 #include <asm/domain.h>
 #include <xen/numa.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 
 #include "private.h"
 
@@ -53,139 +54,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-static int hap_enable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    /* turn on PG_log_dirty bit in paging mode */
-    paging_lock(d);
-    d->arch.paging.mode |= PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static int hap_disable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    paging_lock(d);
-    d->arch.paging.mode &= ~PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table with normal mode */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_logdirty, p2m_ram_rw);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static void hap_clean_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return;
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-}
-
-static void hap_vram_tracking_init(struct domain *d)
-{
-    paging_log_dirty_init(d, hap_enable_vram_tracking,
-                          hap_disable_vram_tracking,
-                          hap_clean_vram_tracking);
-}
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( nr )
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram )
-        {
-            if ( begin_pfn != dirty_vram->begin_pfn ||
-                 begin_pfn + nr != dirty_vram->end_pfn )
-            {
-                paging_log_dirty_disable(d);
-                dirty_vram->begin_pfn = begin_pfn;
-                dirty_vram->end_pfn = begin_pfn + nr;
-                rc = paging_log_dirty_enable(d);
-                if (rc != 0)
-                    goto param_fail;
-            }
-        }
-        else if ( !paging_mode_log_dirty(d) && !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-                goto param_fail;
-
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-            hap_vram_tracking_init(d);
-            rc = paging_log_dirty_enable(d);
-            if (rc != 0)
-                goto param_fail;
-        }
-        else
-        {
-            if ( !paging_mode_log_dirty(d) && dirty_vram )
-                rc = -EINVAL;
-            else
-                rc = -ENODATA;
-            goto param_fail;
-        }
-        /* get the bitmap */
-        rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-    }
-    else
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram ) {
-            rc = paging_log_dirty_disable(d);
-            xfree(dirty_vram);
-            dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-        } else
-            rc = 0;
-    }
-
-    return rc;
-
-param_fail:
-    if ( dirty_vram )
-    {
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
@@ -223,14 +91,12 @@ static void hap_clean_dirty_bitmap(struct domain *d)
 
 void hap_logdirty_init(struct domain *d)
 {
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( paging_mode_log_dirty(d) && dirty_vram )
     {
         paging_log_dirty_disable(d);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+        dirty_vram_free(d);
     }
-
     /* Reinitialize logdirty mechanism */
     paging_log_dirty_init(d, hap_enable_log_dirty,
                           hap_disable_log_dirty,
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index ea44e39..55bedde 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -333,8 +334,11 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
     mfn_t *l4, *l3, *l2;
     unsigned long *l1;
     int i4, i3, i2;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     domain_pause(d);
+    /* Locking hierarchy requires p2m lock to be taken first */
+    p2m_lock(p2m);
     paging_lock(d);
 
     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
@@ -345,6 +349,14 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
                  d->arch.paging.log_dirty.fault_count,
                  d->arch.paging.log_dirty.dirty_count);
 
+    if ( hap_enabled(d) && d->arch.hvm_domain.dirty_vram )
+    {
+        /* If we're cleaning/peeking all guest memory, we should not be tracking
+         * dirty vram. */
+        rv = -EINVAL;
+        goto out;
+    }
+
     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
 
@@ -424,170 +436,64 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 
     if ( clean )
     {
-        /* We need to further call clean_dirty_bitmap() functions of specific
-         * paging modes (shadow or hap).  Safe because the domain is paused. */
-        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        /* Is null if tracking dirty vram */
+        if ( d->arch.paging.log_dirty.clean_dirty_bitmap )
+        {
+            /*
+             * We need to call clean_dirty_bitmap() functions of specific
+             * paging modes (shadow or hap).
+             * Safe because the domain is paused.
+             */
+            d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        }
     }
     domain_unpause(d);
     return rv;
 
  out:
     paging_unlock(d);
+    p2m_unlock(p2m);
     domain_unpause(d);
     return rv;
 }
 
-int paging_log_dirty_range(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+void paging_log_dirty_range(struct domain *d,
+                           unsigned long begin_pfn,
+                           unsigned long nr,
+                           uint8_t *dirty_bitmap)
 {
-    int rv = 0;
-    unsigned long pages = 0;
-    mfn_t *l4, *l3, *l2;
-    unsigned long *l1;
-    int b1, b2, b3, b4;
-    int i2, i3, i4;
-
-    d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    dv_range_t *range;
+    unsigned int range_dirty_count = 0;
+    
+    p2m_lock(p2m);
     paging_lock(d);
 
-    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
-                 d->domain_id,
-                 d->arch.paging.log_dirty.fault_count,
-                 d->arch.paging.log_dirty.dirty_count);
-
-    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
-        printk("%s: %d failed page allocs while logging dirty pages\n",
-               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
-        rv = -ENOMEM;
-        goto out;
-    }
-
-    if ( !d->arch.paging.log_dirty.fault_count &&
-         !d->arch.paging.log_dirty.dirty_count ) {
-        unsigned int size = BITS_TO_LONGS(nr);
-
-        if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
-            rv = -EFAULT;
-        goto out;
-    }
-    d->arch.paging.log_dirty.fault_count = 0;
-    d->arch.paging.log_dirty.dirty_count = 0;
-
-    b1 = L1_LOGDIRTY_IDX(begin_pfn);
-    b2 = L2_LOGDIRTY_IDX(begin_pfn);
-    b3 = L3_LOGDIRTY_IDX(begin_pfn);
-    b4 = L4_LOGDIRTY_IDX(begin_pfn);
-    l4 = paging_map_log_dirty_bitmap(d);
-
-    for ( i4 = b4;
-          (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
-          i4++ )
+    /* Only called when tracking dirty vram in HAP mode */
+    ASSERT(hap_enabled(d) && d->arch.hvm_domain.dirty_vram);
+    
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( range )
     {
-        l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-        for ( i3 = b3;
-              (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
-              i3++ )
-        {
-            l2 = ((l3 && mfn_valid(l3[i3])) ?
-                  map_domain_page(mfn_x(l3[i3])) : NULL);
-            for ( i2 = b2;
-                  (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
-                  i2++ )
-            {
-                unsigned int bytes = PAGE_SIZE;
-                uint8_t *s;
-                l1 = ((l2 && mfn_valid(l2[i2])) ?
-                      map_domain_page(mfn_x(l2[i2])) : NULL);
-
-                s = ((uint8_t*)l1) + (b1 >> 3);
-                bytes -= b1 >> 3;
-
-                if ( likely(((nr - pages + 7) >> 3) < bytes) )
-                    bytes = (unsigned int)((nr - pages + 7) >> 3);
-
-                if ( !l1 )
-                {
-                    if ( clear_guest_offset(dirty_bitmap, pages >> 3,
-                                            bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                /* begin_pfn is not 32K aligned, hence we have to bit
-                 * shift the bitmap */
-                else if ( b1 & 0x7 )
-                {
-                    int i, j;
-                    uint32_t *l = (uint32_t*) s;
-                    int bits = b1 & 0x7;
-                    int bitmask = (1 << bits) - 1;
-                    int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
-                    unsigned long bitmap[size];
-                    static unsigned long printed = 0;
-
-                    if ( printed != begin_pfn )
-                    {
-                        dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
-                                __FUNCTION__, begin_pfn);
-                        printed = begin_pfn;
-                    }
-
-                    for ( i = 0; i < size - 1; i++, l++ ) {
-                        bitmap[i] = ((*l) >> bits) |
-                            (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
-                    }
-                    s = (uint8_t*) l;
-                    size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
-                    bitmap[i] = 0;
-                    for ( j = 0; j < size; j++, s++ )
-                        bitmap[i] |= (*s) << (j * 8);
-                    bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
-                    if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
-                                (uint8_t*) bitmap, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                else
-                {
-                    if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
-                                              s, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-
-                pages += bytes << 3;
-                if ( l1 )
-                {
-                    clear_page(l1);
-                    unmap_domain_page(l1);
-                }
-                b1 = b1 & 0x7;
-            }
-            b2 = 0;
-            if ( l2 )
-                unmap_domain_page(l2);
-        }
-        b3 = 0;
-        if ( l3 )
-            unmap_domain_page(l3);
+        range_dirty_count = range->dirty_count;
+        range->dirty_count = 0;
     }
-    if ( l4 )
-        unmap_domain_page(l4);
-
-    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
 
-    return rv;
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 range->end_pfn,
+                 range_dirty_count);
 
+    hap_clean_vram_tracking_range(d, begin_pfn, nr, dirty_bitmap);
  out:
     paging_unlock(d);
-    return rv;
+    p2m_unlock(p2m);
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ce79131..56afd16 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3463,179 +3459,212 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 
 /**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
-
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          1
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while ( pl != NULL )
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if ( sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if ( sl1mn == mfn_x(smfn) ) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if ( mappings > max_mappings )
+            max_mappings = mappings;
+        
+        if ( unshadowed ) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
+
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index b0e6d72..9c00574 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to
+ * (or cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..95637de
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,201 @@
+/******************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this
+ * by recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 20
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+typedef struct dv_pl_entry {
+    dv_paddr_link_t mapping;
+    bool_t stuck_dirty;
+} dv_pl_entry_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the level 1 PTEs
+ * that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..6146542 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram * dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 9a40f2c..f96ca3b 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -137,10 +137,10 @@ struct paging_mode {
 void paging_free_log_dirty_bitmap(struct domain *d);
 
 /* get the dirty bitmap for a specific range of pfns */
-int paging_log_dirty_range(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+void paging_log_dirty_range(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            uint8_t *dirty_bitmap);
 
 /* enable log dirty */
 int paging_log_dirty_enable(struct domain *d);
@@ -183,15 +183,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH] Provide support for multiple frame buffers in Xen
@ 2012-11-07 20:36 Robert Phillips
  0 siblings, 0 replies; 35+ messages in thread
From: Robert Phillips @ 2012-11-07 20:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Robert Phillips

Support is provided for both shadow and hardware assisted paging (HAP) modes.
This code bookkeeps the set of video frame buffers (vram),
detects when the guest has modified any of those buffers and, upon request,
returns a bitmap of the modified pages.
This lets other software components re-paint the portions of the monitor (or monitors) that have changed.
Each monitor has a frame buffer of some size at some position in guest physical memory.
The set of frame buffers being tracked can change over time as monitors are plugged and unplugged.
(Version 2 of this patch.)

Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
---
 tools/libxc/xenctrl.h            |   17 +-
 xen/arch/x86/hvm/Makefile        |    2 +-
 xen/arch/x86/hvm/hvm.c           |    4 +-
 xen/arch/x86/mm/Makefile         |    1 +
 xen/arch/x86/mm/dirty_vram.c     |  951 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c        |  140 +-----
 xen/arch/x86/mm/paging.c         |  236 ++++------
 xen/arch/x86/mm/shadow/common.c  |  335 ++++++++------
 xen/arch/x86/mm/shadow/multi.c   |  174 ++++---
 xen/arch/x86/mm/shadow/multi.h   |    7 +-
 xen/arch/x86/mm/shadow/types.h   |    1 +
 xen/include/asm-x86/dirty_vram.h |  196 ++++++++
 xen/include/asm-x86/hap.h        |    4 -
 xen/include/asm-x86/hvm/domain.h |    2 +-
 xen/include/asm-x86/paging.h     |   22 +-
 xen/include/asm-x86/shadow.h     |    6 -
 16 files changed, 1535 insertions(+), 563 deletions(-)
 create mode 100644 xen/arch/x86/mm/dirty_vram.c
 create mode 100644 xen/include/asm-x86/dirty_vram.h

diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 7eb5743..693d7fe 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1552,15 +1552,20 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in the VRAM area
+ * Track dirty bit changes in a VRAM region defined by
+ * [ first_pfn : first_pfn + nr - 1 ]
  *
  * All of this is done atomically:
- * - get the dirty bitmap since the last call
- * - set up dirty tracking area for period up to the next call
- * - clear the dirty tracking area.
+ * - gets the dirty bitmap since the last call, all zeroes for
+ *   the first call with some new region
+ * - sets up a dirty tracking region for period up to the next call
+ * - clears the specified dirty tracking region.
  *
- * Returns -ENODATA and does not fill bitmap if the area has changed since the
- * last call.
+ * Creating a new region causes any existing regions that it overlaps
+ * to be discarded.
+ *
+ * Specifying nr == 0 causes all regions to be discarded and
+ * disables dirty bit tracking.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index eea5555..e374aac 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -22,4 +22,4 @@ obj-y += vlapic.o
 obj-y += vmsi.o
 obj-y += vpic.o
 obj-y += vpt.o
-obj-y += vpmu.o
\ No newline at end of file
+obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 34da2f5..b12e3b6 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,6 +57,7 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -1433,8 +1434,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            paging_mark_dirty(v->domain, mfn_x(mfn));
-            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            paging_mark_dirty_hap(v->domain, gfn, mfn_x(mfn));
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..becd0c9 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,6 +5,7 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
+obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
new file mode 100644
index 0000000..72bcf4e
--- /dev/null
+++ b/xen/arch/x86/mm/dirty_vram.c
@@ -0,0 +1,951 @@
+/*
+ * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
+ * with support for multiple frame buffers.
+ *
+ * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/guest_access.h>
+#include <asm/shadow.h>
+#include <asm/dirty_vram.h>
+#include "mm-locks.h"
+
+#define DEBUG_stop_tracking_all_vram          1
+#define DEBUG_allocating_dirty_vram_range     1
+#define DEBUG_high_water_mark_for_vram_ranges 1
+#define DEBUG_freeing_dirty_vram_range        1
+#define DEBUG_allocate_paddr_links_page       0
+#define DEBUG_update_vram_mapping             0
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
+    if ( dirty_vram )
+    {
+        INIT_LIST_HEAD(&dirty_vram->range_head);
+        INIT_LIST_HEAD(&dirty_vram->ext_head);
+    }
+    return dirty_vram;
+}
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( !dirty_vram )
+        dirty_vram = dirty_vram_alloc(d);
+    return dirty_vram;
+}
+
+
+/* Free domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr, *next;
+        /* Free all the ranges */
+        list_for_each_safe(curr, next, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+#if DEBUG_stop_tracking_all_vram
+            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
+                     range->begin_pfn, range->end_pfn);
+#endif
+            xfree(range->pl_tab);
+            xfree(range);
+        }
+        /* Free all the extension pages */
+        list_for_each_safe(curr, next, &dirty_vram->ext_head)
+        {
+            struct dv_paddr_link_ext *ext =
+                container_of(
+                    curr, struct dv_paddr_link_ext, ext_link);
+            struct page_info *pg = __virt_to_page(ext);
+            d->arch.paging.free_page(d, pg);
+        }
+
+        xfree(dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
+}
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn)
+{
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( gfn >= range->begin_pfn &&
+                 gfn <  range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
+ * NULL if none.
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        struct list_head *curr;
+        list_for_each(curr, &dirty_vram->range_head)
+        {
+            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+            if ( begin_pfn == range->begin_pfn &&
+                 end_pfn   == range->end_pfn )
+                return range;
+        }
+    }
+    return NULL;
+}
+
+/* Allocate specified dirty_vram range */
+static dv_range_t *
+_dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range = NULL;
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_paddr_link_t *pl_tab = NULL;
+    int i;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+#if DEBUG_allocating_dirty_vram_range
+    gdprintk(XENLOG_DEBUG,
+             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
+             begin_pfn, end_pfn,
+             d->arch.hvm_domain.hap_enabled);
+#endif
+
+    range = xzalloc(dv_range_t);
+    if ( range == NULL )
+        goto err_out;
+
+    INIT_LIST_HEAD(&range->range_link);
+
+    range->begin_pfn = begin_pfn;
+    range->end_pfn = end_pfn;
+
+    if ( !hap_enabled(d) )
+    {
+        if ( (pl_tab = xzalloc_array(dv_paddr_link_t, nr)) == NULL )
+            goto err_out;
+
+        for ( i = 0; i != nr; i++ )
+        {
+            pl_tab[i].sl1ma = INVALID_PADDR;
+        }
+    }
+
+    range->pl_tab = pl_tab;
+    range->mappings_hwm = 1;
+
+    list_add(&range->range_link, &dirty_vram->range_head);
+    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
+    {
+        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
+#if DEBUG_high_water_mark_for_vram_ranges
+        gdprintk(XENLOG_DEBUG,
+                 "High water mark for number of vram ranges is now:%d\n",
+                 dirty_vram->ranges_hwm);
+#endif
+    }
+    return range;
+
+ err_out:
+    xfree(pl_tab);
+    xfree(range);
+    return NULL;
+}
+
+
+/* Frees specified dirty_vram range */
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    ASSERT( paging_locked_by_me(d) );
+    if ( dirty_vram )
+    {
+        int i, nr = range->end_pfn - range->begin_pfn;
+
+#if DEBUG_freeing_dirty_vram_range
+        gdprintk(XENLOG_DEBUG,
+                 "[%05lx:%05lx] Freeing dirty vram range\n",
+                 range->begin_pfn, range->end_pfn);
+#endif
+
+        if ( range->pl_tab )
+        {
+            for ( i = 0; i != nr; i++ )
+            {
+                dv_paddr_link_t *plx;
+                plx = range->pl_tab[i].pl_next;
+                /* Does current FB page have multiple mappings? */
+                if ( plx ) /* yes */
+                {
+                    /* Find the last element in singly-linked list */
+                    while ( plx->pl_next != NULL )
+                        plx = plx->pl_next;
+                    
+                    /* Prepend whole list to the free list */
+                    plx->pl_next = dirty_vram->pl_free;
+                    dirty_vram->pl_free = range->pl_tab[i].pl_next;
+                }
+            }
+            xfree(range->pl_tab);
+            range->pl_tab = NULL;
+        }
+
+        /* Remove range from the linked list, free it, and adjust count*/
+        list_del(&range->range_link);
+        xfree(range);
+        dirty_vram->nr_ranges--;
+    }
+}
+
+/*
+ * dirty_vram_range_alloc()
+ * This function ensures that the new range does not overlap any existing
+ * ranges -- deleting them if necessary -- and then calls
+ * _dirty_vram_range_alloc to actually allocate the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                        unsigned long begin_pfn,
+                        unsigned long nr)
+{
+    unsigned long end_pfn = begin_pfn + nr;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+    struct list_head *curr, *next;
+
+    ASSERT( paging_locked_by_me(d) );
+    ASSERT( dirty_vram != NULL );
+
+    /*
+     * Ranges cannot overlap so
+     * free any range that overlaps [ begin_pfn .. end_pfn )
+     */
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    {
+        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
+        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
+               ( begin_pfn <  rng->end_pfn   )
+                 ) ||
+             ( ( begin_pfn <= rng->begin_pfn ) &&
+               ( rng->begin_pfn < end_pfn    )
+                 ) )
+        {
+            /* Different tracking, tear the previous down. */
+            dirty_vram_range_free(d, rng);
+        }
+    }
+
+    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
+    if ( !range )
+        goto out;
+
+ out:
+    return range;
+}
+
+/*
+ * dirty_vram_range_find_or_alloc()
+ * Find the range for [begin_pfn:begin_pfn+nr).
+ * If it doesn't exists, create it.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                                unsigned long begin_pfn,
+                                unsigned long nr)
+{
+    dv_range_t *range;
+    ASSERT( paging_locked_by_me(d) );
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+    
+    return range;
+}
+
+
+
+/* Allocate a dv_paddr_link struct */
+static dv_paddr_link_t *
+alloc_paddr_link(struct domain *d)
+{
+    dv_paddr_link_t * pl = NULL;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    ASSERT( paging_locked_by_me(d) );
+    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
+    /* Is the list of free pl's empty? */
+    if ( dirty_vram->pl_free == NULL ) /* yes */
+    {
+        /*
+         * Allocate another page of pl's.
+         * Link them all together and point the free list head at them
+         */
+        int i;
+        struct page_info *pg = d->arch.paging.alloc_page(d);
+        dv_paddr_link_ext_t *ext = __page_to_virt(pg);
+        if ( ext == NULL )
+            goto out;
+
+#if DEBUG_allocate_paddr_links_page
+        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
+#endif
+        list_add(&ext->ext_link, &dirty_vram->ext_head);
+
+        /* initialize and link together the new pl entries */
+        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
+        {
+            ext->entries[i].sl1ma = INVALID_PADDR;
+            ext->entries[i].pl_next = &ext->entries[i+1];
+        }
+        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = NULL;
+        dirty_vram->pl_free = &ext->entries[0];
+    }
+    pl = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl->pl_next;
+
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = NULL;
+ out:
+    return pl;
+}
+
+
+/*
+ * Free a paddr_link struct.
+ *
+ * The caller has walked the singly-linked list of elements
+ * that have, as their head, an element in a pl_tab cell.
+ * The list walks has reached the element to be freed.
+ * (Each element is a dv_paddr_link_t struct.)
+ *
+ * @pl points to the element to be freed.
+ * @ppl points to its predecessor element's next member.
+ *
+ * After linking the precessor to the element's successor,
+ * we can free @pl by prepending it to the list of free
+ * elements.
+ *
+ * As a boundary case (which happens to be the common case),
+ * @pl points to a cell in the pl_tab rather than to some
+ * extension element danging from that cell.
+ * We recognize this case because @ppl is NULL.
+ * In that case we promote the first extension element by
+ * copying it into the pl_tab cell and free it.
+ */
+
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl)
+{
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_paddr_link_t *npl; /* next pl */
+
+    ASSERT( paging_locked_by_me(d) );
+    /* extension mapping? */
+    if ( ppl ) /* yes. free it */
+    {
+        ASSERT(pl == (*ppl));
+        (*ppl) = npl = pl->pl_next;
+    }
+    else  /* main table */
+    {
+        /*
+         * move 2nd mapping to main table.
+         * and free 2nd mapping
+         */
+        dv_paddr_link_t * spl;
+        spl = pl->pl_next;
+        if ( spl == NULL )
+        {
+            pl->sl1ma = INVALID_PADDR;
+            return pl;
+        }
+        pl->sl1ma = spl->sl1ma;
+        pl->pl_next = spl->pl_next;
+        npl = pl; /* reprocess main table entry again */
+        pl = spl;
+    }
+    pl->sl1ma = INVALID_PADDR;
+    pl->pl_next = dirty_vram->pl_free;
+    dirty_vram->pl_free = pl;
+    return npl;
+}
+
+
+/*
+ * dirty_vram_range_update()
+ *
+ * This is called whenever a level 1 page table entry is modified.
+ * If the L1PTE is being cleared, the function removes any paddr_links
+ * that refer to it.
+ * If the L1PTE is being set to a frame buffer page, a paddr_link is
+ * created for that page's entry in pl_tab.
+ * Returns 1 iff entry found and set or cleared.
+ */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set)
+{
+    int effective = 0;
+    dv_range_t *range;
+    unsigned long i;
+    dv_paddr_link_t *pl;
+    dv_paddr_link_t **ppl;
+    int len = 0;
+
+    ASSERT(paging_locked_by_me(d));
+    range = dirty_vram_range_find_gfn(d, gfn);
+    if ( !range )
+        return effective;
+
+    
+    i = gfn - range->begin_pfn;
+    pl = &range->pl_tab[ i ];
+    ppl = NULL;
+
+    /*
+     * find matching entry (pl), if any, and its predecessor
+     * in linked list (ppl)
+     */
+    while ( pl != NULL )
+    {
+        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
+            break;
+            
+        ppl = &pl->pl_next;
+        pl = *ppl;
+        len++;
+    }
+
+    if ( set )
+    {
+        /* Did we find sl1ma in either the main table or the linked list? */
+        if ( pl == NULL ) /* no, so we'll need to alloc a link */
+        {
+            ASSERT(ppl != NULL);
+            /*
+             * Have we reached the limit of mappings we're willing
+             * to bookkeep?
+             */
+            if (len > DV_ADDR_LINK_LIST_LIMIT) /* yes */
+                /*
+                 * The side effect of failing to record this mapping
+                 * is that we will not detect any changes to the frame
+                 * buffer made through the mapping so
+                 * areas of a monitor may not get refreshed.
+                 * In practice this should never happen.
+                 */
+                goto out;
+
+            /* alloc link and append it to list */
+            (*ppl) = pl = alloc_paddr_link(d);
+            if ( pl == NULL )
+                goto out;
+        }
+        if ( pl->sl1ma != sl1ma )
+        {
+            ASSERT(pl->sl1ma == INVALID_PADDR);
+            pl->sl1ma = sl1ma;
+            range->nr_mappings++;
+        }
+        effective = 1;
+        if ( len > range->mappings_hwm )
+        {
+            range->mappings_hwm = len;
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] set      sl1ma:%lx hwm:%d mappings:%d freepages:%d\n",
+                     gfn, sl1ma,
+                     range->mappings_hwm,
+                     range->nr_mappings,
+                     d->arch.paging.shadow.free_pages);
+#endif
+        }
+    }
+    else /* clear */
+    {
+        if ( pl && pl->sl1ma == sl1ma )
+        {
+#if DEBUG_update_vram_mapping
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] clear    sl1ma:%lx mappings:%d\n",
+                     gfn, sl1ma,
+                     range->nr_mappings - 1);
+#endif
+            free_paddr_link(d, ppl, pl);
+            if ( --range->nr_mappings == 0 )
+            {
+                dirty_vram_range_free(d, range);
+            }
+            effective = 1;
+        }
+    }
+ out:
+    return effective;
+}
+
+
+/*
+ * shadow_scan_dirty_flags()
+ * This produces a dirty bitmap for the range by examining every
+ * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
+ * It tests and clears each such L1PTE's dirty flag.
+ */
+static int shadow_scan_dirty_flags(struct domain *d,
+                                   dv_range_t *range,
+                                   uint8_t *dirty_bitmap)
+{
+    int flush_tlb = 0;
+    unsigned long i;
+    unsigned long nr = range->end_pfn - range->begin_pfn;
+
+    ASSERT( paging_locked_by_me(d) );
+    /* Iterate over VRAM to track dirty bits. */
+    for ( i = 0; i < nr; i++ )
+    {
+        int dirty = 0, len = 1;
+        dv_paddr_link_t *pl;
+        for ( pl = &range->pl_tab[i]; pl; pl = pl->pl_next, len++ )
+        {
+            l1_pgentry_t *sl1e;
+            paddr_t sl1ma = pl->sl1ma;
+            if (sl1ma == INVALID_PADDR) /* FB page is unmapped */
+                continue;
+            sl1e = maddr_to_virt(sl1ma);
+            if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+            {
+                dirty = 1;
+                /* Clear dirty so we can detect if page gets re-dirtied.
+                 * Note: this is atomic, so we may clear a
+                 * _PAGE_ACCESSED set by another processor.
+                 */
+                l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                flush_tlb = 1;
+            }
+        } /* for */
+        
+        if ( dirty )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+
+    }
+
+    return flush_tlb;
+}
+
+
+/*
+ * shadow_track_dirty_vram()
+ * This is the API called by the guest to determine which pages in the range
+ * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
+ * It creates the domain's dv_dirty_vram on demand.
+ * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
+ * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
+ * It copies the dirty bitmask into guest storage.
+ */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    int rc = 0;
+    unsigned long end_pfn = begin_pfn + nr;
+    int flush_tlb = 0;
+    dv_range_t *range;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    /*
+     * This range test is tricky.
+     *
+     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn is a pfn
+     * beyond the end of the range.
+     *
+     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
+     * invalid PFN.
+     *
+     * If end_pfn is beyond *that* then the range is invalid.
+     */
+    if ( end_pfn < begin_pfn
+         || begin_pfn > p2m->max_mapped_pfn
+         || end_pfn > p2m->max_mapped_pfn + 1 )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    if (!nr)
+    {
+        dirty_vram_free(d);
+        goto out;
+    }
+
+    if ( guest_handle_is_null(guest_dirty_bitmap) )
+        goto out;
+
+    if ( !dirty_vram_find_or_alloc(d) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+    {
+        range = dirty_vram_range_alloc(d, begin_pfn, nr);
+        if ( range )
+            sh_find_all_vram_mappings(d->vcpu[0], range);
+    }
+    if ( range )
+    {
+        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+        unsigned long dirty_bitmap[size];
+
+        memset(dirty_bitmap, 0x00, size * BYTES_PER_LONG);
+
+	flush_tlb |= shadow_scan_dirty_flags(d, range, (uint8_t*)dirty_bitmap);
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           (uint8_t*)dirty_bitmap,
+                           size * BYTES_PER_LONG) == 0 )
+            rc = 0;
+    }
+    
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+out:
+    paging_unlock(d);
+    return rc;
+}
+
+
+/************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_enable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-only.
+ */
+static int hap_enable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    /* turn on PG_log_dirty bit in paging mode */
+    paging_lock(d);
+    d->arch.paging.mode |= PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    /*
+     * dirty_vram != NULL iff we're tracking dirty vram.
+     * If we start tracking dirty pages for all memory then
+     * the dirty_vram structure is freed.
+     */
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table to be read-only. */
+    list_for_each(curr, &dirty_vram->range_head)
+    {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] enable  vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+			      p2m_ram_rw, p2m_ram_logdirty);
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if ( rc )
+    {
+        paging_lock(d);
+        d->arch.paging.mode &= ~PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/*
+ * hap_disable_vram_tracking()
+ * For all ranges, mark all vram pages in range as logdirty read-write.
+ */
+static int hap_disable_vram_tracking(struct domain *d)
+{
+    int rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr;
+
+    paging_lock(d);
+    d->arch.paging.mode &= ~PG_log_dirty;
+    paging_unlock(d);
+
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
+
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( !dirty_vram )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* set l1e entries of P2M table with normal mode */
+    list_for_each(curr, &dirty_vram->range_head)
+    {
+	dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+	gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] disable vram tracking\n",
+		 range->begin_pfn, range->end_pfn);
+	p2m_change_type_range(d, range->begin_pfn, range->end_pfn,
+			      p2m_ram_logdirty, p2m_ram_rw);
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+ out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    if ( rc )
+    {
+        paging_lock(d);
+        d->arch.paging.mode |= PG_log_dirty;
+        paging_unlock(d);
+    }
+    return rc;
+}
+
+/*
+ * hap_clean_vram_tracking_range()
+ * For all the pages in the range specified by [begin_pfn,nr),
+ * note in the dirty bitmap any page that has been marked as read-write,
+ * which signifies that the page has been dirtied, and reset the page
+ * to ram_logdirty.
+ */
+void hap_clean_vram_tracking_range(struct domain *d,
+                                   unsigned long begin_pfn,
+                                   unsigned long nr,
+                                   uint8_t *dirty_bitmap)
+{
+    int i;
+    unsigned long pfn;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    dv_range_t *range;
+
+    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
+    ASSERT(paging_locked_by_me(d));
+
+    if ( !dirty_vram )
+    {
+        gdprintk(XENLOG_DEBUG,
+                 "Should only be called while tracking dirty vram.\n");
+        return;
+    }
+
+    range = dirty_vram_range_find(d, begin_pfn, nr);
+    if ( !range )
+        return;
+
+    /* set l1e entries of P2M table to be read-only. */
+    /*
+     * On first write, it page faults, its entry is changed to read-write,
+     * its bit in the dirty bitmap is set, and on retry the write succeeds.
+     */
+    for ( i = 0, pfn = range->begin_pfn; pfn < range->end_pfn; i++, pfn++ )
+    {
+        p2m_type_t pt;
+        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
+        if ( pt == p2m_ram_rw )
+            dirty_bitmap[i >> 3] |= (1 << (i & 7));
+    }
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+static void hap_vram_tracking_init(struct domain *d)
+{
+    paging_log_dirty_init(d, hap_enable_vram_tracking,
+                          hap_disable_vram_tracking,
+                          NULL);
+}
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range().
+ */
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    dv_dirty_vram_t *dirty_vram;
+
+    paging_lock(d);
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    if ( nr )
+    {
+        dv_range_t *range = NULL;
+        int size = ( nr + BITS_PER_LONG - 1 ) & ~( BITS_PER_LONG - 1 );
+        uint8_t dirty_bitmap[size];
+
+        /* Already tracking dirty vram? */
+        if ( paging_mode_log_dirty(d) && dirty_vram ) /* yes */
+        {
+            /* Handle the addition of another range */
+            range = dirty_vram_range_find(d, begin_pfn, nr);
+            if ( !range )
+            {
+                rc = -ENOMEM;
+                if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
+                    goto param_fail;
+            }
+        }
+        /* Just starting to track dirty vram? */
+        else if ( !paging_mode_log_dirty(d) && !dirty_vram ) /* yes */
+        {
+            rc = -ENOMEM;
+            if ( !(dirty_vram = dirty_vram_alloc(d)) )
+                goto param_fail;
+
+            if ( !(range = dirty_vram_range_find_or_alloc(d, begin_pfn, nr)) )
+                goto param_fail;
+
+            /* Initialize callbacks for vram tracking */
+            hap_vram_tracking_init(d);
+
+            /* Enable HAP vram tracking */
+            paging_unlock(d);
+            rc = paging_log_dirty_enable(d);
+            paging_lock(d);
+            
+            if ( rc != 0 )
+                goto param_fail;
+        }
+        else
+        {
+            /* Test for invalid combination */
+            if ( !paging_mode_log_dirty(d) && dirty_vram )
+                rc = -EINVAL;
+            else /* logging dirty of all memory, not tracking dirty vram */
+                rc = -ENODATA;
+            goto param_fail;
+        }
+
+        paging_unlock(d);
+        memset(dirty_bitmap, 0x00, size);
+	paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap,
+                           dirty_bitmap,
+                           size) == 0 )
+        {
+            rc = 0;
+        }
+    }
+    else
+    {
+        /*
+         * If zero pages specified while already tracking dirty vram
+         * then stop tracking
+         */
+        if ( paging_mode_log_dirty(d) && dirty_vram )
+        {
+            /* Disable HAP vram tracking */
+            paging_unlock(d);
+            rc = paging_log_dirty_disable(d);
+            paging_lock(d);
+            
+            dirty_vram_free(d);
+        }
+        else /* benign no-op */
+        {
+            rc = 0;
+        }
+        paging_unlock(d);
+    }
+
+    return rc;
+
+param_fail:
+    dirty_vram_free(d);
+    paging_unlock(d);
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index fd99cde..09cdba2 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -41,6 +41,7 @@
 #include <asm/domain.h>
 #include <xen/numa.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 
 #include "private.h"
 
@@ -53,139 +54,6 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
-/*          HAP VRAM TRACKING SUPPORT           */
-/************************************************/
-
-static int hap_enable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    /* turn on PG_log_dirty bit in paging mode */
-    paging_lock(d);
-    d->arch.paging.mode |= PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static int hap_disable_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return -EINVAL;
-
-    paging_lock(d);
-    d->arch.paging.mode &= ~PG_log_dirty;
-    paging_unlock(d);
-
-    /* set l1e entries of P2M table with normal mode */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_logdirty, p2m_ram_rw);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-    return 0;
-}
-
-static void hap_clean_vram_tracking(struct domain *d)
-{
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram )
-        return;
-
-    /* set l1e entries of P2M table to be read-only. */
-    p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, 
-                          p2m_ram_rw, p2m_ram_logdirty);
-
-    flush_tlb_mask(d->domain_dirty_cpumask);
-}
-
-static void hap_vram_tracking_init(struct domain *d)
-{
-    paging_log_dirty_init(d, hap_enable_vram_tracking,
-                          hap_disable_vram_tracking,
-                          hap_clean_vram_tracking);
-}
-
-int hap_track_dirty_vram(struct domain *d,
-                         unsigned long begin_pfn,
-                         unsigned long nr,
-                         XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    long rc = 0;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( nr )
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram )
-        {
-            if ( begin_pfn != dirty_vram->begin_pfn ||
-                 begin_pfn + nr != dirty_vram->end_pfn )
-            {
-                paging_log_dirty_disable(d);
-                dirty_vram->begin_pfn = begin_pfn;
-                dirty_vram->end_pfn = begin_pfn + nr;
-                rc = paging_log_dirty_enable(d);
-                if (rc != 0)
-                    goto param_fail;
-            }
-        }
-        else if ( !paging_mode_log_dirty(d) && !dirty_vram )
-        {
-            rc = -ENOMEM;
-            if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-                goto param_fail;
-
-            dirty_vram->begin_pfn = begin_pfn;
-            dirty_vram->end_pfn = begin_pfn + nr;
-            d->arch.hvm_domain.dirty_vram = dirty_vram;
-            hap_vram_tracking_init(d);
-            rc = paging_log_dirty_enable(d);
-            if (rc != 0)
-                goto param_fail;
-        }
-        else
-        {
-            if ( !paging_mode_log_dirty(d) && dirty_vram )
-                rc = -EINVAL;
-            else
-                rc = -ENODATA;
-            goto param_fail;
-        }
-        /* get the bitmap */
-        rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-    }
-    else
-    {
-        if ( paging_mode_log_dirty(d) && dirty_vram ) {
-            rc = paging_log_dirty_disable(d);
-            xfree(dirty_vram);
-            dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-        } else
-            rc = 0;
-    }
-
-    return rc;
-
-param_fail:
-    if ( dirty_vram )
-    {
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
-    return rc;
-}
-
-/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
@@ -223,14 +91,12 @@ static void hap_clean_dirty_bitmap(struct domain *d)
 
 void hap_logdirty_init(struct domain *d)
 {
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( paging_mode_log_dirty(d) && dirty_vram )
     {
         paging_log_dirty_disable(d);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+        dirty_vram_free(d);
     }
-
     /* Reinitialize logdirty mechanism */
     paging_log_dirty_init(d, hap_enable_log_dirty,
                           hap_disable_log_dirty,
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index ea44e39..5e76609 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,6 +27,7 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -278,6 +279,46 @@ out:
 }
 
 
+/* paging_mark_dirty_hap()
+ * Make a hap page writeable and mark it as dirty.
+ * This done atomically under the p2m and paging locks to avoid leaving
+ * a window where the page might be modified without being counted as dirty.
+ */
+void paging_mark_dirty_hap(struct domain *d,
+                           unsigned long pfn,
+                           unsigned long guest_mfn)
+{
+    mfn_t gmfn;
+    p2m_type_t pt;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    
+    if ( !paging_mode_log_dirty(d) )
+        return;
+
+    gmfn = _mfn(guest_mfn);
+
+    ASSERT( mfn_valid(gmfn) &&
+            page_get_owner(mfn_to_page(gmfn)) == d );
+
+    p2m_lock(p2m);
+    pt = p2m_change_type(d, pfn, p2m_ram_logdirty, p2m_ram_rw);
+    paging_lock(d);
+    if ( pt == p2m_ram_logdirty )
+    {
+        dv_range_t *range;
+        PAGING_DEBUG(LOGDIRTY,
+                     "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+                     mfn_x(gmfn), pfn, d->domain_id);
+        d->arch.paging.log_dirty.dirty_count++;
+        range = dirty_vram_range_find_gfn(d, pfn);
+        if ( range )
+            range->dirty_count++;
+    }
+    paging_mark_dirty(d, guest_mfn); 
+    paging_unlock(d);
+    p2m_unlock(p2m);
+}
+
 /* Is this guest page dirty? */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
 {
@@ -333,8 +374,11 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
     mfn_t *l4, *l3, *l2;
     unsigned long *l1;
     int i4, i3, i2;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     domain_pause(d);
+    /* Locking hierarchy requires p2m lock to be taken first */
+    p2m_lock(p2m);
     paging_lock(d);
 
     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
@@ -345,6 +389,14 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
                  d->arch.paging.log_dirty.fault_count,
                  d->arch.paging.log_dirty.dirty_count);
 
+    if ( hap_enabled(d) && d->arch.hvm_domain.dirty_vram )
+    {
+        /* If we're cleaning/peeking all guest memory, we should not be tracking
+         * dirty vram. */
+        rv = -EINVAL;
+        goto out;
+    }
+
     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
 
@@ -424,170 +476,64 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 
     if ( clean )
     {
-        /* We need to further call clean_dirty_bitmap() functions of specific
-         * paging modes (shadow or hap).  Safe because the domain is paused. */
-        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        /* Is null if tracking dirty vram */
+        if ( d->arch.paging.log_dirty.clean_dirty_bitmap )
+        {
+            /*
+             * We need to call clean_dirty_bitmap() functions of specific
+             * paging modes (shadow or hap).
+             * Safe because the domain is paused.
+             */
+            d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+        }
     }
     domain_unpause(d);
     return rv;
 
  out:
     paging_unlock(d);
+    p2m_unlock(p2m);
     domain_unpause(d);
     return rv;
 }
 
-int paging_log_dirty_range(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+void paging_log_dirty_range(struct domain *d,
+                           unsigned long begin_pfn,
+                           unsigned long nr,
+                           uint8_t *dirty_bitmap)
 {
-    int rv = 0;
-    unsigned long pages = 0;
-    mfn_t *l4, *l3, *l2;
-    unsigned long *l1;
-    int b1, b2, b3, b4;
-    int i2, i3, i4;
-
-    d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    dv_range_t *range;
+    unsigned int range_dirty_count = 0;
+    
+    p2m_lock(p2m);
     paging_lock(d);
 
-    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
-                 d->domain_id,
-                 d->arch.paging.log_dirty.fault_count,
-                 d->arch.paging.log_dirty.dirty_count);
-
-    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
-        printk("%s: %d failed page allocs while logging dirty pages\n",
-               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
-        rv = -ENOMEM;
-        goto out;
-    }
-
-    if ( !d->arch.paging.log_dirty.fault_count &&
-         !d->arch.paging.log_dirty.dirty_count ) {
-        unsigned int size = BITS_TO_LONGS(nr);
-
-        if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
-            rv = -EFAULT;
-        goto out;
-    }
-    d->arch.paging.log_dirty.fault_count = 0;
-    d->arch.paging.log_dirty.dirty_count = 0;
-
-    b1 = L1_LOGDIRTY_IDX(begin_pfn);
-    b2 = L2_LOGDIRTY_IDX(begin_pfn);
-    b3 = L3_LOGDIRTY_IDX(begin_pfn);
-    b4 = L4_LOGDIRTY_IDX(begin_pfn);
-    l4 = paging_map_log_dirty_bitmap(d);
-
-    for ( i4 = b4;
-          (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
-          i4++ )
+    /* Only called when tracking dirty vram in HAP mode */
+    ASSERT(hap_enabled(d) && d->arch.hvm_domain.dirty_vram);
+    
+    range = dirty_vram_range_find_gfn(d, begin_pfn);
+    if ( range )
     {
-        l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-        for ( i3 = b3;
-              (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
-              i3++ )
-        {
-            l2 = ((l3 && mfn_valid(l3[i3])) ?
-                  map_domain_page(mfn_x(l3[i3])) : NULL);
-            for ( i2 = b2;
-                  (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
-                  i2++ )
-            {
-                unsigned int bytes = PAGE_SIZE;
-                uint8_t *s;
-                l1 = ((l2 && mfn_valid(l2[i2])) ?
-                      map_domain_page(mfn_x(l2[i2])) : NULL);
-
-                s = ((uint8_t*)l1) + (b1 >> 3);
-                bytes -= b1 >> 3;
-
-                if ( likely(((nr - pages + 7) >> 3) < bytes) )
-                    bytes = (unsigned int)((nr - pages + 7) >> 3);
-
-                if ( !l1 )
-                {
-                    if ( clear_guest_offset(dirty_bitmap, pages >> 3,
-                                            bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                /* begin_pfn is not 32K aligned, hence we have to bit
-                 * shift the bitmap */
-                else if ( b1 & 0x7 )
-                {
-                    int i, j;
-                    uint32_t *l = (uint32_t*) s;
-                    int bits = b1 & 0x7;
-                    int bitmask = (1 << bits) - 1;
-                    int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
-                    unsigned long bitmap[size];
-                    static unsigned long printed = 0;
-
-                    if ( printed != begin_pfn )
-                    {
-                        dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
-                                __FUNCTION__, begin_pfn);
-                        printed = begin_pfn;
-                    }
-
-                    for ( i = 0; i < size - 1; i++, l++ ) {
-                        bitmap[i] = ((*l) >> bits) |
-                            (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
-                    }
-                    s = (uint8_t*) l;
-                    size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
-                    bitmap[i] = 0;
-                    for ( j = 0; j < size; j++, s++ )
-                        bitmap[i] |= (*s) << (j * 8);
-                    bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
-                    if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
-                                (uint8_t*) bitmap, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-                else
-                {
-                    if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
-                                              s, bytes) != 0 )
-                    {
-                        rv = -EFAULT;
-                        goto out;
-                    }
-                }
-
-                pages += bytes << 3;
-                if ( l1 )
-                {
-                    clear_page(l1);
-                    unmap_domain_page(l1);
-                }
-                b1 = b1 & 0x7;
-            }
-            b2 = 0;
-            if ( l2 )
-                unmap_domain_page(l2);
-        }
-        b3 = 0;
-        if ( l3 )
-            unmap_domain_page(l3);
+        range_dirty_count = range->dirty_count;
+        range->dirty_count = 0;
     }
-    if ( l4 )
-        unmap_domain_page(l4);
-
-    paging_unlock(d);
+    
+    if ( !range_dirty_count)
+        goto out;
 
-    return rv;
+    PAGING_DEBUG(LOGDIRTY,
+                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
+                 d->domain_id,
+                 begin_pfn,
+                 range->end_pfn,
+                 range_dirty_count);
 
+    hap_clean_vram_tracking_range(d, begin_pfn, nr, dirty_bitmap);
  out:
     paging_unlock(d);
-    return rv;
+    p2m_unlock(p2m);
+    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ce79131..4e40f43 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if (d->arch.hvm_domain.dirty_vram) {
-        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
-        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
-        xfree(d->arch.hvm_domain.dirty_vram);
-        d->arch.hvm_domain.dirty_vram = NULL;
-    }
+    dirty_vram_free(d);
 
     paging_unlock(d);
 
@@ -3463,179 +3459,212 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 
 /**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long begin_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-{
-    int rc;
-    unsigned long end_pfn = begin_pfn + nr;
-    unsigned long dirty_size = (nr + 7) / 8;
-    int flush_tlb = 0;
-    unsigned long i;
-    p2m_type_t t;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
-        return -EINVAL;
-
-    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
-    p2m_lock(p2m_get_hostp2m(d));
-    paging_lock(d);
+/* Support functions for shadow-based dirty VRAM code */
 
-    if ( dirty_vram && (!nr ||
-             ( begin_pfn != dirty_vram->begin_pfn
-            || end_pfn   != dirty_vram->end_pfn )) )
-    {
-        /* Different tracking, tear the previous down. */
-        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
-        xfree(dirty_vram->sl1ma);
-        xfree(dirty_vram->dirty_bitmap);
-        xfree(dirty_vram);
-        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-    }
+#define DEBUG_unshadow_sl1ma                  0          
+#define DEBUG_unshadow_sl1ma_detail           0
+#define DEBUG_count_initial_mappings          1
 
-    if ( !nr )
+/* smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping. */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type, 
+                         mfn_t smfn)
+{
+    static unsigned int l1_shadow_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+    struct domain *d = v->domain;
+    dv_dirty_vram_t *dirty_vram;
+    struct list_head *curr, *next;
+    
+    ASSERT(paging_locked_by_me(d));
+    /* Ignore all but level 1 shadows */
+    
+    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
     {
-        rc = 0;
         goto out;
     }
 
-    /* This should happen seldomly (Video mode change),
-     * no need to be careful. */
+    dirty_vram = d->arch.hvm_domain.dirty_vram;
     if ( !dirty_vram )
     {
-        /* Throw away all the shadows rather than walking through them 
-         * up to nr times getting rid of mappings of each pfn */
-        shadow_blow_tables(d);
-
-        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
-        rc = -ENOMEM;
-        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-            goto out;
-        dirty_vram->begin_pfn = begin_pfn;
-        dirty_vram->end_pfn = end_pfn;
-        d->arch.hvm_domain.dirty_vram = dirty_vram;
-
-        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
-            goto out_dirty_vram;
-        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
-        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
-            goto out_sl1ma;
-
-        dirty_vram->last_dirty = NOW();
-
-        /* Tell the caller that this time we could not track dirty bits. */
-        rc = -ENODATA;
-    }
-    else if (dirty_vram->last_dirty == -1)
-    {
-        /* still completely clean, just copy our empty bitmap */
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
-            rc = 0;
+        goto out;
     }
-    else
+        
+    list_for_each_safe(curr, next, &dirty_vram->range_head)
     {
-        /* Iterate over VRAM to track dirty bits. */
-        for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
-            struct page_info *page;
-            int dirty = 0;
-            paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-            if (mfn_x(mfn) == INVALID_MFN)
-            {
-                dirty = 1;
-            }
-            else
+        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
+        unsigned long i;
+        int max_mappings = 1, mappings = 0;
+        int unshadowed = 0;
+        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
+        {
+            dv_paddr_link_t *pl = &range->pl_tab[ i ];
+            dv_paddr_link_t **ppl = NULL;
+            mappings = 0;
+            
+            while ( pl != NULL )
             {
-                page = mfn_to_page(mfn);
-                switch (page->u.inuse.type_info & PGT_count_mask)
-                {
-                case 0:
-                    /* No guest reference, nothing to track. */
-                    break;
-                case 1:
-                    /* One guest reference. */
-                    if ( sl1ma == INVALID_PADDR )
-                    {
-                        /* We don't know which sl1e points to this, too bad. */
-                        dirty = 1;
-                        /* TODO: Heuristics for finding the single mapping of
-                         * this gmfn */
-                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
-                    }
-                    else
-                    {
-                        /* Hopefully the most common case: only one mapping,
-                         * whose dirty bit we can use. */
-                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
-
-                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                        {
-                            dirty = 1;
-                            /* Note: this is atomic, so we may clear a
-                             * _PAGE_ACCESSED set by another processor. */
-                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                            flush_tlb = 1;
-                        }
-                    }
-                    break;
-                default:
-                    /* More than one guest reference,
-                     * we don't afford tracking that. */
-                    dirty = 1;
+                paddr_t sl1ma = pl->sl1ma;
+                unsigned long sl1mn;
+                
+                if ( sl1ma == INVALID_PADDR )
                     break;
+                
+                sl1mn = sl1ma >> PAGE_SHIFT;
+                if ( sl1mn == mfn_x(smfn) ) {
+#if DEBUG_unshadow_sl1ma_detail
+                    gdprintk(XENLOG_DEBUG,
+                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
+                             mfn_x(smfn),
+                             range->begin_pfn + i,
+                             sl1ma);
+#endif
+                    unshadowed++;
+                    pl = free_paddr_link(d, ppl, pl);
+                    --range->nr_mappings;
+                }
+                else
+                {
+                    ppl = &pl->pl_next;
+                    pl = *ppl;
+                    mappings++;
                 }
             }
-
-            if ( dirty )
+        }
+        if ( mappings > max_mappings )
+            max_mappings = mappings;
+        
+        if ( unshadowed ) {
+#if DEBUG_unshadow_sl1ma
+            gdprintk(XENLOG_DEBUG,
+                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x max_mappings:%d\n",
+                     mfn_x(smfn),
+                     range->begin_pfn, range->end_pfn,
+                     unshadowed, range->nr_mappings, max_mappings);
+#endif
+            if ( range->nr_mappings == 0 )
             {
-                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-                dirty_vram->last_dirty = NOW();
+                dirty_vram_range_free(d, range);                    
             }
         }
+    }
+ out:
+    return;
+}
+
+
+typedef int (*hash_pfn_callback_t)(struct vcpu *v,
+                                   mfn_t smfn,
+                                   unsigned long begin_pfn,
+                                   unsigned long end_pfn,
+                                   int *removed);
+
+static int hash_pfn_foreach(struct vcpu *v, 
+                            unsigned int callback_mask, 
+                            hash_pfn_callback_t callbacks[], 
+                            unsigned long begin_pfn,
+                            unsigned long end_pfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0, removed = 0;
+    struct domain *d = v->domain;
+    struct page_info *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(paging_locked_by_me(d));
+    ASSERT(d->arch.paging.shadow.hash_walking == 0);
+    d->arch.paging.shadow.hash_walking = 1;
 
-        rc = -EFAULT;
-        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
-            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
-            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                /* was clean for more than two seconds, try to disable guest
-                 * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
-                    if (mfn_x(mfn) != INVALID_MFN)
-                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
-                }
-                dirty_vram->last_dirty = -1;
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
+                                               begin_pfn, end_pfn,
+                                               &removed);
+                if ( done ) break;
             }
-            rc = 0;
         }
+        if ( done ) break; 
     }
-    if ( flush_tlb )
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    goto out;
+    d->arch.paging.shadow.hash_walking = 0;
+    return removed;
+}
 
-out_sl1ma:
-    xfree(dirty_vram->sl1ma);
-out_dirty_vram:
-    xfree(dirty_vram);
-    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_pfn_callback_t callbacks[SH_type_unused] = {
+        NULL, /* none    */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
+        NULL, /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l2h_64  */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
 
-out:
-    paging_unlock(d);
-    p2m_unlock(p2m_get_hostp2m(d));
-    return rc;
+    static unsigned int callback_mask = 
+          1 << SH_type_l1_32_shadow
+        | 1 << SH_type_fl1_32_shadow
+        | 1 << SH_type_l1_pae_shadow
+        | 1 << SH_type_fl1_pae_shadow
+        | 1 << SH_type_l1_64_shadow
+        | 1 << SH_type_fl1_64_shadow
+        ;
+
+    perfc_incr(shadow_mappings);
+
+    hash_pfn_foreach(v, callback_mask, callbacks,
+                     range->begin_pfn,
+                     range->end_pfn);
+
+#if DEBUG_count_initial_mappings
+    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
+             range->begin_pfn, range->end_pfn,
+             range->nr_mappings);
+#endif
 }
 
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index b0e6d72..9c00574 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,6 +35,7 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
+#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
+    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
+    
+    /* Removing any dv_paddr_links to the erstwhile shadow page */
+    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
+    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
-    if ( unlikely((level == 1) && dirty_vram
-            && dirty_vram->last_dirty == -1
-            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
-            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
-    {
-        if ( ft & FETCH_TYPE_WRITE )
-            dirty_vram->last_dirty = NOW();
-        else
-            sflags &= ~_PAGE_RW;
-    }
-
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+/* shadow_vram_fix_l1e()
+ *
+ * Tests L1PTEs as they are modified, looking for when they start to
+ * (or cease to) point to frame buffer pages.  If the old and new gfns differ,
+ * calls dirty_vram_range_update() to updates the dirty_vram structures.
+ */
+static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t new_mfn, old_mfn;
+    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
+    paddr_t sl1ma;
+    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+    if ( !dirty_vram )
         return;
 
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
+    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
 
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    old_mfn = shadow_l1e_get_mfn(old_sl1e);
+
+    if ( !sh_l1e_is_magic(old_sl1e) &&
+         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(old_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
-                | ((unsigned long)sl1e & ~PAGE_MASK);
+        old_gfn = mfn_to_gfn(d, old_mfn);
     }
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-
-    if ( !dirty_vram         /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = mfn_to_gfn(d, mfn);
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    
+    new_mfn = shadow_l1e_get_mfn(new_sl1e);
+    if ( !sh_l1e_is_magic(new_sl1e) &&
+         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
+         mfn_valid(new_mfn))
     {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
-            | ((unsigned long)sl1e & ~PAGE_MASK);
+        new_gfn = mfn_to_gfn(d, new_mfn);
+    }
 
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
+    if ( old_gfn == new_gfn ) return;
+
+    if ( VALID_M2P(old_gfn) )
+        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
         {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
+            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
+                          old_gfn, mfn_x(old_mfn));
         }
-        if ( dirty )
+
+    if ( VALID_M2P(new_gfn) )
+        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
         {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
+            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
+                          new_gfn, mfn_x(new_mfn));
         }
-    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
+    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
+
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
+
+int sh_find_vram_mappings_in_l1(struct vcpu *v,
+                                mfn_t sl1mfn,
+                                unsigned long begin_pfn,
+                                unsigned long end_pfn,
+                                int *removed)
+/* Find all VRAM mappings in this shadow l1 table */
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    int done = 0;
+
+    /* only returns _PAGE_PRESENT entries */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        unsigned long gfn;
+        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
+        if ( !mfn_valid(gmfn) )
+            continue;
+        gfn = mfn_to_gfn(d, gmfn);
+        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
+        {
+            paddr_t sl1ma =
+                pfn_to_paddr(mfn_x(sl1mfn)) |
+                ( (unsigned long)sl1e & ~PAGE_MASK );
+            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
+        }
+    });
+    return 0;
+}
+
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 835121e..436a4ac 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-
+extern int
+SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
+     (struct vcpu *v, mfn_t sl1mfn, 
+      unsigned long begin_pfn,
+      unsigned long end_pfn,
+      int *removed);
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 43ce1db..5b0f9f7 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
+#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/dirty_vram.h b/xen/include/asm-x86/dirty_vram.h
new file mode 100644
index 0000000..7f3ccf9
--- /dev/null
+++ b/xen/include/asm-x86/dirty_vram.h
@@ -0,0 +1,196 @@
+/******************************************************************************
+ * include/asm-x86/dirty_vram.h
+ *
+ * Interface for tracking dirty VRAM pages
+ *
+ * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _DIRTY_VRAM_H
+#define _DIRTY_VRAM_H
+
+/*
+ * In shadow mode we need to bookkeep all the L1 page table entries that
+ * map a frame buffer page.  Struct dv_paddr_link does this
+ * by recording the address of a L1 page table entry for some frame buffer page.
+ * Also has a link to additional pl entries if the frame buffer page
+ * has multiple mappings.
+ * In practice very few pages have multiple mappings.
+ * But to rule out some pathological situation, we limit the number of
+ * mappings we're willing to bookkeep.
+ */
+
+#define DV_ADDR_LINK_LIST_LIMIT 20
+
+typedef struct dv_paddr_link {
+    paddr_t sl1ma;
+    struct dv_paddr_link *pl_next;
+} dv_paddr_link_t;
+
+/*
+ * This defines an extension page of pl entries for FB pages with multiple
+ * mappings. All such pages (of a domain) are linked together.
+ */
+typedef struct dv_paddr_link_ext {
+    struct list_head ext_link;
+    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( struct list_head ) ) /
+                             sizeof( dv_paddr_link_t ) ];
+} dv_paddr_link_ext_t;
+
+/*
+ * This defines a single frame buffer range.  It bookkeeps all the level 1 PTEs
+ * that map guest pages within that range.
+ * All such ranges (of a domain) are linked together.
+ */
+typedef struct dv_range {
+    struct list_head range_link; /* the several ranges form a linked list */
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    dv_paddr_link_t *pl_tab; /* table has 1 pl entry per pfn in range */
+    int nr_mappings;  /* total number of mappings in this range */
+    int mappings_hwm; /* high water mark of max mapping count */
+    unsigned int dirty_count;
+} dv_range_t;
+
+/*
+ * This contains all the data structures required by a domain to
+ * bookkeep the dirty pages within its frame buffers.
+ */
+typedef struct dv_dirty_vram {
+    struct list_head range_head; /* head of the linked list of ranges */
+    struct list_head ext_head; /* head of list of extension pages */
+    dv_paddr_link_t *pl_free; /* free list of pl's within extension pages */
+    int nr_ranges; /* bookkeeps number of ranges */
+    int ranges_hwm; /* high water mark of max number of ranges */
+} dv_dirty_vram_t;
+
+/* Allocates domain's dirty_vram structure */
+dv_dirty_vram_t *
+dirty_vram_alloc(struct domain *d);
+
+/*
+ * Returns domain's dirty_vram structure,
+ * allocating it if necessary
+ */
+dv_dirty_vram_t *
+dirty_vram_find_or_alloc(struct domain *d);
+
+/* Frees domain's dirty_vram structure */
+void dirty_vram_free(struct domain *d);
+
+/* Returns dirty vram range containing gfn, NULL if none */
+struct dv_range *
+dirty_vram_range_find_gfn(struct domain *d,
+                          unsigned long gfn);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * NULL if none
+ */
+dv_range_t *
+dirty_vram_range_find(struct domain *d,
+                      unsigned long begin_pfn,
+                      unsigned long nr);
+
+/*
+ * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_alloc(struct domain *d,
+                       unsigned long begin_pfn,
+                       unsigned long nr);
+
+/*
+ * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
+ * creating a range if none already exists and
+ * freeing any existing range that overlaps the new range.
+ */
+dv_range_t *
+dirty_vram_range_find_or_alloc(struct domain *d,
+                               unsigned long begin_pfn,
+                               unsigned long nr);
+
+void dirty_vram_range_free(struct domain *d,
+                           dv_range_t *range);
+
+/* Bookkeep PTE address of a frame buffer page */
+int dirty_vram_range_update(struct domain *d,
+                            unsigned long gfn,
+                            paddr_t sl1ma,
+                            int set);
+
+/*
+ * smfn is no longer a shadow page.  Remove it from any
+ * dirty vram range mapping.
+ */
+void
+dirty_vram_delete_shadow(struct vcpu *v,
+                         unsigned long gfn,
+                         unsigned int shadow_type,
+                         mfn_t smfn);
+
+
+/*
+ * Scan all the L1 tables looking for VRAM mappings.
+ * Record them in the domain's dv_dirty_vram structure
+ */
+void sh_find_all_vram_mappings(struct vcpu *v,
+                               dv_range_t *range);
+
+/*
+ * Free a paddr_link struct, given address of its
+ * predecessor in singly-linked list
+ */
+dv_paddr_link_t *
+free_paddr_link(struct domain *d,
+                dv_paddr_link_t **ppl,
+                dv_paddr_link_t *pl);
+
+
+/* Enable VRAM dirty tracking. */
+int
+shadow_track_dirty_vram(struct domain *d,
+			unsigned long first_pfn,
+			unsigned long nr,
+			XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+int
+hap_track_dirty_vram(struct domain *d,
+		     unsigned long begin_pfn,
+		     unsigned long nr,
+		     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
+void
+hap_clean_vram_tracking_range(struct domain *d,
+			      unsigned long begin_pfn,
+			      unsigned long nr,
+			      uint8_t *dirty_bitmap);
+
+#endif /* _DIRTY_VRAM_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 916a35b..3e3a1f5 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
-int   hap_track_dirty_vram(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..6146542 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct sh_dirty_vram *dirty_vram;
+    struct dv_dirty_vram * dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 9a40f2c..f804104 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -137,10 +137,10 @@ struct paging_mode {
 void paging_free_log_dirty_bitmap(struct domain *d);
 
 /* get the dirty bitmap for a specific range of pfns */
-int paging_log_dirty_range(struct domain *d,
-                           unsigned long begin_pfn,
-                           unsigned long nr,
-                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+void paging_log_dirty_range(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            uint8_t *dirty_bitmap);
 
 /* enable log dirty */
 int paging_log_dirty_enable(struct domain *d);
@@ -161,6 +161,11 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 
+/* mark a page as dirty, from hap page fault handler */
+void paging_mark_dirty_hap(struct domain *d,
+                           unsigned long pfn,
+                           unsigned long guest_mfn);
+
 /*
  * Log-dirty radix tree indexing:
  *   All tree nodes are PAGE_SIZE bytes, mapped on-demand.
@@ -183,15 +188,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
-/* VRAM dirty tracking support */
-struct sh_dirty_vram {
-    unsigned long begin_pfn;
-    unsigned long end_pfn;
-    paddr_t *sl1ma;
-    uint8_t *dirty_bitmap;
-    s_time_t last_dirty;
-};
-
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 2eb6efc..940d7fd 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Enable VRAM dirty bit tracking. */
-int shadow_track_dirty_vram(struct domain *d,
-                            unsigned long first_pfn,
-                            unsigned long nr,
-                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2013-10-10 15:07 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-16 18:15 [PATCH] Provide support for multiple frame buffers in Xen Robert Phillips
2012-10-16 18:21 ` Robert Phillips
2012-10-22 16:10 ` Tim Deegan
2012-10-22 17:45   ` Robert Phillips
2012-11-01 11:03     ` Tim Deegan
2012-11-01 11:07       ` Robert Phillips
2012-11-01 11:43         ` Ian Campbell
2012-11-01 14:24 ` Tim Deegan
2012-11-07 20:36   ` Robert Phillips
2012-11-08 13:25     ` Tim Deegan
2012-11-12 21:31       ` Robert Phillips
2012-11-07 20:36 Robert Phillips
2012-11-12 21:31 Robert Phillips
2012-11-15 13:22 ` Tim Deegan
2012-11-27 15:58   ` Robert Phillips
2012-11-15 15:38 Fabio Fantoni
2012-11-20  1:23 ` Ben Guthro
2012-11-27 15:52 Robert Phillips
2013-01-02 14:47 Robert Phillips
2013-01-10 12:29 ` Tim Deegan
2013-01-16 15:10   ` Robert Phillips
2013-01-16 15:11 Robert Phillips
2013-01-17 11:54 ` Tim Deegan
2013-01-21 19:28 Robert Phillips
2013-01-22  7:31 ` Pasi Kärkkäinen
2013-01-24 11:25 ` Tim Deegan
2013-02-05 11:38   ` Robert Phillips
2013-02-07 11:04     ` Tim Deegan
2013-03-01 20:52   ` Robert Phillips
2013-03-01 20:48 Robert Phillips
2013-03-02 11:20 ` Pasi Kärkkäinen
2013-03-07 12:05 ` Tim Deegan
2013-10-10 14:48 ` Wei Liu
2013-10-10 14:58   ` Ben Guthro
2013-10-10 15:07     ` Wei Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.