All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] REPOST: Memory tracking for physical machine migration
@ 2011-06-10 23:19 Jim Paradis
  2011-06-11  7:55 ` Christoph Hellwig
  2011-06-20 14:55 ` Dave Hansen
  0 siblings, 2 replies; 5+ messages in thread
From: Jim Paradis @ 2011-06-10 23:19 UTC (permalink / raw)
  To: linux-mm; +Cc: Jim Paradis

[tried posting this a couple days ago... kept having formatting problems
with the exchange server.  Let's see how this works...]

This patch implements a system to track re-dirtied pages and modified
PTEs.  It is used by Stratus Technologies for both our ftLinux product and
our new GPL Live Kernel Self Migration project (lksm.sourceforge.net).
In both cases, we bring a backup server online by copying the primary
server's state while it is running.  We start by copying all of memory
top to bottom.  We then go back and re-copy any pages that were changed
during the first copy pass.  After several such passes we momentarily
suspend processing so we can copy the last few pages over and bring up
the secondary system.  This patch keeps track of which pages need to be
copied during these passes.

 arch/x86/Kconfig                      |   11 +++++++++++
 arch/x86/include/asm/hugetlb.h        |    3 +++
 arch/x86/include/asm/pgtable-2level.h |    4 ++++
 arch/x86/include/asm/pgtable-3level.h |   11 +++++++++++
 arch/x86/include/asm/pgtable.h        |    4 ++--
 arch/x86/include/asm/pgtable_32.h     |    1 +
 arch/x86/include/asm/pgtable_64.h     |    7 +++++++
 arch/x86/include/asm/pgtable_types.h  |    5 ++++-
 arch/x86/mm/Makefile                  |    2 ++
 mm/huge_memory.c                      |    4 ++--
 11 files changed, 48 insertions(+), 6 deletions(-)

Signed-off-by: "James Paradis" <james.paradis@stratus.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a..cc778a4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1146,6 +1146,17 @@ config DIRECT_GBPAGES
 	  support it. This can improve the kernel's performance a tiny bit by
 	  reducing TLB pressure. If in doubt, say "Y".
 
+config TRACK_DIRTY_PAGES
+	bool "Enable dirty page tracking"
+	default n
+	depends on !KMEMCHECK
+	---help---
+	  Turning this on enables tracking of re-dirtied and
+	  changed pages.  This is needed by the Live Kernel
+	  Self Migration project (lksm.sourceforge.net) to perform
+	  live copying of memory and system state to another system.
+	  Most users will say n here.
+
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 439a9ac..8266873 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_HUGETLB_H
 
 #include <asm/page.h>
+#include <asm/mm_track.h>
 
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
@@ -39,12 +40,14 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 				   pte_t *ptep, pte_t pte)
 {
+	mm_track_pmd((pmd_t *)ptep);
 	set_pte_at(mm, addr, ptep, pte);
 }
 
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep)
 {
+	mm_track_pmd((pmd_t *)ptep);
 	return ptep_get_and_clear(mm, addr, ptep);
 }
 
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 98391db..a59deb5 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -13,11 +13,13 @@
  */
 static inline void native_set_pte(pte_t *ptep , pte_t pte)
 {
+	mm_track_pte(ptep);
 	*ptep = pte;
 }
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+	mm_track_pmd(pmdp);
 	*pmdp = pmd;
 }
 
@@ -34,12 +36,14 @@ static inline void native_pmd_clear(pmd_t *pmdp)
 static inline void native_pte_clear(struct mm_struct *mm,
 				    unsigned long addr, pte_t *xp)
 {
+	mm_track_pte(xp);
 	*xp = native_make_pte(0);
 }
 
 #ifdef CONFIG_SMP
 static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 {
+	mm_track_pte(xp);
 	return __pte(xchg(&xp->pte_low, 0));
 }
 #else
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index effff47..b75d753 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -26,6 +26,7 @@
  */
 static inline void native_set_pte(pte_t *ptep, pte_t pte)
 {
+	mm_track_pte(ptep);
 	ptep->pte_high = pte.pte_high;
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
@@ -33,16 +34,19 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
+	mm_track_pte(ptep);
 	set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
 }
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+	mm_track_pmd(pmdp);
 	set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
 }
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
+	mm_track_pud(pudp);
 	set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
 }
 
@@ -54,6 +58,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 				    pte_t *ptep)
 {
+	mm_track_pte(ptep);
 	ptep->pte_low = 0;
 	smp_wmb();
 	ptep->pte_high = 0;
@@ -62,6 +67,9 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 static inline void native_pmd_clear(pmd_t *pmd)
 {
 	u32 *tmp = (u32 *)pmd;
+
+	mm_track_pmd(pmd);
+
 	*tmp = 0;
 	smp_wmb();
 	*(tmp + 1) = 0;
@@ -69,6 +77,7 @@ static inline void native_pmd_clear(pmd_t *pmd)
 
 static inline void pud_clear(pud_t *pudp)
 {
+	mm_track_pud(pudp);
 	set_pud(pudp, __pud(0));
 
 	/*
@@ -88,6 +97,8 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
 {
 	pte_t res;
 
+	mm_track_pte(ptep);
+
 	/* xchg acts as a barrier before the setting of the high bits */
 	res.pte_low = xchg(&ptep->pte_low, 0);
 	res.pte_high = ptep->pte_high;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18601c8..30bb916 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -89,7 +89,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
  */
 static inline int pte_dirty(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_DIRTY;
+	return pte_flags(pte) & (_PAGE_DIRTY | _PAGE_SOFTDIRTY);
 }
 
 static inline int pte_young(pte_t pte)
@@ -183,7 +183,7 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_DIRTY);
+	return pte_clear_flags(pte, (_PAGE_DIRTY | _PAGE_SOFTDIRTY));
 }
 
 static inline pte_t pte_mkold(pte_t pte)
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0c92113..78415fb 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <asm/mm_track.h>
 
 struct mm_struct;
 struct vm_area_struct;
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 975f709..0848e9e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -13,6 +13,7 @@
 #include <asm/processor.h>
 #include <linux/bitops.h>
 #include <linux/threads.h>
+#include <asm/mm_track.h>
 
 extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
@@ -46,11 +47,13 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 				    pte_t *ptep)
 {
+	mm_track_pte(ptep);
 	*ptep = native_make_pte(0);
 }
 
 static inline void native_set_pte(pte_t *ptep, pte_t pte)
 {
+	mm_track_pte(ptep);
 	*ptep = pte;
 }
 
@@ -61,6 +64,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+	mm_track_pmd(pmdp);
 	*pmdp = pmd;
 }
 
@@ -71,6 +75,7 @@ static inline void native_pmd_clear(pmd_t *pmd)
 
 static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 {
+	mm_track_pte(xp);
 #ifdef CONFIG_SMP
 	return native_make_pte(xchg(&xp->pte, 0));
 #else
@@ -97,6 +102,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
+	mm_track_pud(pudp);
 	*pudp = pud;
 }
 
@@ -107,6 +113,7 @@ static inline void native_pud_clear(pud_t *pud)
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+	mm_track_pgd(pgdp);
 	*pgdp = pgd;
 }
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d56187c..7f366d0 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,6 +23,7 @@
 #define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_SPLITTING	_PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */
+#define _PAGE_BIT_SOFTDIRTY	_PAGE_BIT_HIDDEN
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -47,6 +48,7 @@
 #define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
 #define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
 #define _PAGE_SPLITTING	(_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING)
+#define _PAGE_SOFTDIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTDIRTY)
 #define __HAVE_ARCH_PTE_SPECIAL
 
 #ifdef CONFIG_KMEMCHECK
@@ -71,7 +73,8 @@
 
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
-			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
+			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
+			 _PAGE_SOFTDIRTY)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3e608ed..a416317 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -30,3 +30,5 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
 
 obj-$(CONFIG_MEMTEST)		+= memtest.o
+
+obj-$(CONFIG_TRACK_DIRTY_PAGES)	+= track.o
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 83326ad..b94aad6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -795,7 +795,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					unsigned long haddr)
 {
 	pgtable_t pgtable;
-	pmd_t _pmd;
+	pmd_t _pmd = {0};
 	int ret = 0, i;
 	struct page **pages;
 
@@ -1265,7 +1265,7 @@ static int __split_huge_page_map(struct page *page,
 				 unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	pmd_t *pmd, _pmd;
+	pmd_t *pmd, _pmd = {0};
 	int ret = 0, i;
 	pgtable_t pgtable;
 	unsigned long haddr;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] REPOST: Memory tracking for physical machine migration
  2011-06-10 23:19 [PATCH] REPOST: Memory tracking for physical machine migration Jim Paradis
@ 2011-06-11  7:55 ` Christoph Hellwig
  2011-06-14 18:17   ` Paradis, James
  2011-06-20 14:55 ` Dave Hansen
  1 sibling, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2011-06-11  7:55 UTC (permalink / raw)
  To: Jim Paradis; +Cc: linux-mm

On Fri, Jun 10, 2011 at 07:19:06PM -0400, Jim Paradis wrote:
> [tried posting this a couple days ago... kept having formatting problems
> with the exchange server.  Let's see how this works...]

Much more important is the problem that the patch is utterly useless
as-is.  It just adds adds exports, but no real functionality.  It's not
like I have told you exactly that a million times before, but given that
you don't want to listen it might just be easier to ignore your patches.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] REPOST: Memory tracking for physical machine migration
  2011-06-11  7:55 ` Christoph Hellwig
@ 2011-06-14 18:17   ` Paradis, James
  2011-06-20 11:21     ` Christoph Hellwig
  0 siblings, 1 reply; 5+ messages in thread
From: Paradis, James @ 2011-06-14 18:17 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-mm

[-- Attachment #1: Type: text/plain, Size: 768 bytes --]




-----Original Message-----
> From: Christoph Hellwig [mailto:hch@infradead.org]
>  
> On Fri, Jun 10, 2011 at 07:19:06PM -0400, Jim Paradis wrote:
>> [tried posting this a couple days ago... kept having formatting problems
>> with the exchange server.  Let's see how this works...]
> 
> Much more important is the problem that the patch is utterly useless
> as-is.  It just adds adds exports, but no real functionality.  It's not
> like I have told you exactly that a million times before, but given that
> you don't want to listen it might just be easier to ignore your patches.

Okay, then, help me out here.  What would it take for this to be accepted?
Would you like us to incorporate the memory-harvesting code from LKSM as well?

--jim



[-- Attachment #2: Type: text/html, Size: 1359 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] REPOST: Memory tracking for physical machine migration
  2011-06-14 18:17   ` Paradis, James
@ 2011-06-20 11:21     ` Christoph Hellwig
  0 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2011-06-20 11:21 UTC (permalink / raw)
  To: Paradis, James; +Cc: linux-mm

On Tue, Jun 14, 2011 at 02:17:49PM -0400, Paradis, James wrote:
> Okay, then, help me out here.  What would it take for this to be accepted?
> Would you like us to incorporate the memory-harvesting code from LKSM as well?

You'll need to actually submit useful code, not just exports that aren't
usable in-tree.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] REPOST: Memory tracking for physical machine migration
  2011-06-10 23:19 [PATCH] REPOST: Memory tracking for physical machine migration Jim Paradis
  2011-06-11  7:55 ` Christoph Hellwig
@ 2011-06-20 14:55 ` Dave Hansen
  1 sibling, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2011-06-20 14:55 UTC (permalink / raw)
  To: Jim Paradis; +Cc: linux-mm

On Fri, 2011-06-10 at 19:19 -0400, Jim Paradis wrote:
> diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
> index 3e608ed..a416317 100644
> --- a/arch/x86/mm/Makefile
> +++ b/arch/x86/mm/Makefile
> @@ -30,3 +30,5 @@ obj-$(CONFIG_NUMA_EMU)                += numa_emulation.o
>  obj-$(CONFIG_HAVE_MEMBLOCK)            += memblock.o
> 
>  obj-$(CONFIG_MEMTEST)          += memtest.o
> +
> +obj-$(CONFIG_TRACK_DIRTY_PAGES)        += track.o 

FWIW, this is still having formatting problems.

You also forgot to include track.c, again.  Isn't that where the real
meat of this patch lies?

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-06-20 14:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-10 23:19 [PATCH] REPOST: Memory tracking for physical machine migration Jim Paradis
2011-06-11  7:55 ` Christoph Hellwig
2011-06-14 18:17   ` Paradis, James
2011-06-20 11:21     ` Christoph Hellwig
2011-06-20 14:55 ` Dave Hansen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.