Re: Dirty/Access bits vs. page content

* Re: Dirty/Access bits vs. page content
       [not found]         ` <53558507.9050703@zytor.com>
@ 2014-04-21 22:29           ` Linus Torvalds
  2014-04-21 22:44             ` Dave Hansen
  0 siblings, 1 reply; 99+ messages in thread
From: Linus Torvalds @ 2014-04-21 22:29 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Benjamin Herrenschmidt, Peter Zijlstra, Dave Hansen, linux-arch

[-- Attachment #1: Type: text/plain, Size: 1258 bytes --]

[ Added linux-arch, for totally untested patch to prepare for some TLB
shootdown fixes ]

On Mon, Apr 21, 2014 at 1:52 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>
> So yes, we need to (re-)mark the page dirty some way or another after
> the TLB shootdown has completed, or somehow hold the I/O until after
> that point.

So how about this preparatory patch.

NOTE! This doesn't actually fix anything, but it moves the dirty bit
into the mmu_gather domain, so that eventually things _can_ be fixed.

Right now every implementation of __tlb_remove_page() just ends up
doing that synchronous

       if (dirty)
               set_page_dirty(page);

without batching it up. But at least it should make it possible to
start fixing things.

Actually moving the dirty bit information into the batching is
somewhat painful, because free_pages_and_swap_cache() really wants an
array of 'struct page *', rather than some array of "page and dirty
info". And that in turn is mainly because it then passes it down to
release_pages(), rather than anything else. But it looks doable.

Comments? This compiles on x86, does it compile on other architectures
(in particular, the ones that don't use asm-generic/tlb.h and thus
HAVE_GENERIC_MMU_GATHER).

                 Linus

[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 7592 bytes --]

 arch/arm/include/asm/tlb.h  |  6 ++++--
 arch/ia64/include/asm/tlb.h |  6 ++++--
 arch/s390/include/asm/tlb.h |  4 +++-
 arch/sh/include/asm/tlb.h   |  6 ++++--
 arch/um/include/asm/tlb.h   |  6 ++++--
 include/asm-generic/tlb.h   |  4 ++--
 mm/hugetlb.c                |  4 +---
 mm/memory.c                 | 15 +++++++++------
 8 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 0baf7f0d9394..ac9c16af8e63 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -165,8 +165,10 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 		tlb_flush(tlb);
 }
 
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty)
 {
+	if (dirty)
+		set_page_dirty(page);
 	tlb->pages[tlb->nr++] = page;
 	VM_BUG_ON(tlb->nr > tlb->max);
 	return tlb->max - tlb->nr;
@@ -174,7 +176,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (!__tlb_remove_page(tlb, page, 0))
 		tlb_flush_mmu(tlb);
 }
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index bc5efc7c3f3f..9049a7d6427d 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -193,8 +193,10 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
  * this file).
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty)
 {
+	if (dirty)
+		set_page_dirty(page);
 	tlb->need_flush = 1;
 
 	if (!tlb->nr && tlb->pages == tlb->local)
@@ -213,7 +215,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (!__tlb_remove_page(tlb, page, 0))
 		tlb_flush_mmu(tlb);
 }
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index c544b6f05d95..8107a8c53985 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -76,8 +76,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb,
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty)
 {
+	if (dirty)
+		set_page_dirty(page);
 	free_page_and_swap_cache(page);
 	return 1; /* avoid calling tlb_flush_mmu */
 }
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 362192ed12fe..428de7858d27 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -90,15 +90,17 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
 }
 
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, int dirty)
 {
+	if (dirty)
+		set_page_dirty(page);
 	free_page_and_swap_cache(page);
 	return 1; /* avoid calling tlb_flush_mmu */
 }
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	__tlb_remove_page(tlb, page);
+	__tlb_remove_page(tlb, page, 0);
 }
 
 #define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 29b0301c18aa..df557f987819 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -86,8 +86,10 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  *	while handling the additional races in SMP caused by other CPUs
  *	caching valid mappings in their TLBs.
  */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty)
 {
+	if (dirty)
+		set_page_dirty(page);
 	tlb->need_flush = 1;
 	free_page_and_swap_cache(page);
 	return 1; /* avoid calling tlb_flush_mmu */
@@ -95,7 +97,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	__tlb_remove_page(tlb, page);
+	__tlb_remove_page(tlb, page, 0);
 }
 
 /**
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 5672d7ea1fa0..541c563f0ff9 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -116,7 +116,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
+int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty);
 
 /* tlb_remove_page
  *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
@@ -124,7 +124,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
  */
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	if (!__tlb_remove_page(tlb, page))
+	if (!__tlb_remove_page(tlb, page, 0))
 		tlb_flush_mmu(tlb);
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 246192929a2d..dfbe23c0c200 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2516,11 +2516,9 @@ again:
 
 		pte = huge_ptep_get_and_clear(mm, address, ptep);
 		tlb_remove_tlb_entry(tlb, ptep, address);
-		if (huge_pte_dirty(pte))
-			set_page_dirty(page);
 
 		page_remove_rmap(page);
-		force_flush = !__tlb_remove_page(tlb, page);
+		force_flush = !__tlb_remove_page(tlb, page, huge_pte_dirty(pte));
 		if (force_flush) {
 			spin_unlock(ptl);
 			break;
diff --git a/mm/memory.c b/mm/memory.c
index d0f0bef3be48..62fdcd1995f4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -277,12 +277,15 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
  *	mappings in their TLBs. Returns the number of free page slots left.
  *	When out of page slots we must call tlb_flush_mmu().
  */
-int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty)
 {
 	struct mmu_gather_batch *batch;
 
 	VM_BUG_ON(!tlb->need_flush);
 
+	/* FIXME! This needs to be batched too */
+	if (dirty)
+		set_page_dirty(page);
 	batch = tlb->active;
 	batch->pages[batch->nr++] = page;
 	if (batch->nr == batch->max) {
@@ -1124,11 +1127,11 @@ again:
 					pte_file_mksoft_dirty(ptfile);
 				set_pte_at(mm, addr, pte, ptfile);
 			}
-			if (PageAnon(page))
+			if (PageAnon(page)) {
+				/* We don't bother saving dirty state for anonymous pages */
+				ptent = pte_mkclean(ptent);
 				rss[MM_ANONPAGES]--;
-			else {
-				if (pte_dirty(ptent))
-					set_page_dirty(page);
+			} else {
 				if (pte_young(ptent) &&
 				    likely(!(vma->vm_flags & VM_SEQ_READ)))
 					mark_page_accessed(page);
@@ -1137,7 +1140,7 @@ again:
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			force_flush = !__tlb_remove_page(tlb, page);
+			force_flush = !__tlb_remove_page(tlb, page, pte_dirty(ptent));
 			if (force_flush)
 				break;
 			continue;

^ permalink raw reply related	[flat|nested] 99+ messages in thread