linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/4] mm: de-skew page_count
@ 2006-01-08  5:23 Nick Piggin
  2006-01-08  5:23 ` [patch 1/4] mm: page refcount use atomic primitives Nick Piggin
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Nick Piggin @ 2006-01-08  5:23 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: Nick Piggin, Andrew Morton

The following patchset (against 2.6.15-rc5ish) uses the new atomic ops to
do away with the offset page refcounting, and simplify the race that it
was designed to cover.

This allows some nice optimisations, and we end up saving 2 atomic ops
including a spin_lock_irqsave in the !PageLRU case, and 1 or 2 atomic ops
in the PageLRU case in the page-release path.

It also happens to be a requirement for my lockless pagecache work, but
stands on its own as good patches.

Nick

-- 
SUSE Labs, Novell Inc.


Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 1/4] mm: page refcount use atomic primitives
  2006-01-08  5:23 [patch 0/4] mm: de-skew page_count Nick Piggin
@ 2006-01-08  5:23 ` Nick Piggin
  2006-01-08  5:54   ` Andrew Morton
  2006-01-08  5:24 ` [patch 2/4] mm: PageLRU no testset Nick Piggin
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Nick Piggin @ 2006-01-08  5:23 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: Nick Piggin, Andrew Morton

The VM has an interesting race where a page refcount can drop to zero, but
it is still on the LRU lists for a short time. This was solved by testing
a 0->1 refcount transition when picking up pages from the LRU, and dropping
the refcount in that case.

Instead, use atomic_inc_not_zero to ensure we never pick up a 0 refcount
page from the LRU (ie. we guarantee the page will not be touched).

This ensures we can test PageLRU without taking the lru_lock, and allows
further optimisations (in later patches) -- we end up saving 2 atomic ops
including a spin_lock_irqsave in the !PageLRU case, and 2 or 3 atomic ops
in the PageLRU case.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -281,49 +281,52 @@ struct page {
  *
  * Also, many kernel routines increase the page count before a critical
  * routine so they can be sure the page doesn't go away from under them.
- *
- * Since 2.6.6 (approx), a free page has ->_count = -1.  This is so that we
- * can use atomic_add_negative(-1, page->_count) to detect when the page
- * becomes free and so that we can also use atomic_inc_and_test to atomically
- * detect when we just tried to grab a ref on a page which some other CPU has
- * already deemed to be freeable.
- *
- * NO code should make assumptions about this internal detail!  Use the provided
- * macros which retain the old rules: page_count(page) == 0 is a free page.
  */
 
 /*
  * Drop a ref, return true if the logical refcount fell to zero (the page has
  * no users)
  */
-#define put_page_testzero(p)				\
-	({						\
-		BUG_ON(page_count(p) == 0);		\
-		atomic_add_negative(-1, &(p)->_count);	\
-	})
+static inline int put_page_testzero(struct page *page)
+{
+	BUG_ON(atomic_read(&page->_count) == 0);
+	return atomic_dec_and_test(&page->_count);
+}
 
 /*
  * Grab a ref, return true if the page previously had a logical refcount of
  * zero.  ie: returns true if we just grabbed an already-deemed-to-be-free page
  */
-#define get_page_testone(p)	atomic_inc_and_test(&(p)->_count)
+static inline int get_page_unless_zero(struct page *page)
+{
+	return atomic_inc_not_zero(&page->_count);
+}
 
-#define set_page_count(p,v) 	atomic_set(&(p)->_count, v - 1)
-#define __put_page(p)		atomic_dec(&(p)->_count)
+static inline void set_page_count(struct page *page, int v)
+{
+	atomic_set(&page->_count, v);
+}
+
+static inline void __put_page(struct page *page)
+{
+	BUG_ON(atomic_read(&page->_count) == 0);
+	atomic_dec(&page->_count);
+}
 
 extern void FASTCALL(__page_cache_release(struct page *));
 
 static inline int page_count(struct page *page)
 {
-	if (PageCompound(page))
+	if (unlikely(PageCompound(page)))
 		page = (struct page *)page_private(page);
-	return atomic_read(&page->_count) + 1;
+	return atomic_read(&page->_count);
 }
 
 static inline void get_page(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
 		page = (struct page *)page_private(page);
+	BUG_ON(atomic_read(&page->_count) == 0);
 	atomic_inc(&page->_count);
 }
 
Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c
+++ linux-2.6/mm/vmscan.c
@@ -591,24 +591,20 @@ static int isolate_lru_pages(int nr_to_s
 	int scan = 0;
 
 	while (scan++ < nr_to_scan && !list_empty(src)) {
+		struct list_head *target;
 		page = lru_to_page(src);
 		prefetchw_prev_lru_page(page, src, flags);
 
-		if (!TestClearPageLRU(page))
-			BUG();
+		BUG_ON(!PageLRU(page));
 		list_del(&page->lru);
-		if (get_page_testone(page)) {
-			/*
-			 * It is being freed elsewhere
-			 */
-			__put_page(page);
-			SetPageLRU(page);
-			list_add(&page->lru, src);
-			continue;
-		} else {
-			list_add(&page->lru, dst);
+		target = src;
+		if (get_page_unless_zero(page)) {
+			ClearPageLRU(page);
+			target = dst;
 			nr_taken++;
-		}
+		} /* else it is being freed elsewhere */
+
+		list_add(&page->lru, target);
 	}
 
 	*scanned = scan;
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -180,17 +180,19 @@ void lru_add_drain(void)
  */
 void fastcall __page_cache_release(struct page *page)
 {
-	unsigned long flags;
-	struct zone *zone = page_zone(page);
+	if (PageLRU(page)) {
+		unsigned long flags;
 
-	spin_lock_irqsave(&zone->lru_lock, flags);
-	if (TestClearPageLRU(page))
+		struct zone *zone = page_zone(page);
+		spin_lock_irqsave(&zone->lru_lock, flags);
+		if (!TestClearPageLRU(page))
+			BUG();
 		del_page_from_lru(zone, page);
-	if (page_count(page) != 0)
-		page = NULL;
-	spin_unlock_irqrestore(&zone->lru_lock, flags);
-	if (page)
-		free_hot_page(page);
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
+	}
+
+	BUG_ON(page_count(page) != 0);
+	free_hot_page(page);
 }
 
 EXPORT_SYMBOL(__page_cache_release);
@@ -216,27 +218,31 @@ void release_pages(struct page **pages, 
 	pagevec_init(&pages_to_free, cold);
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
-		struct zone *pagezone;
 
 		if (!put_page_testzero(page))
 			continue;
 
-		pagezone = page_zone(page);
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
-		}
-		if (TestClearPageLRU(page))
+		if (PageLRU(page)) {
+			struct zone *pagezone = page_zone(page);
+			if (pagezone != zone) {
+				if (zone)
+					spin_unlock_irq(&zone->lru_lock);
+				zone = pagezone;
+				spin_lock_irq(&zone->lru_lock);
+			}
+			if (!TestClearPageLRU(page))
+				BUG();
 			del_page_from_lru(zone, page);
-		if (page_count(page) == 0) {
-			if (!pagevec_add(&pages_to_free, page)) {
+		}
+		BUG_ON(page_count(page));
+
+		if (!pagevec_add(&pages_to_free, page)) {
+			if (zone) {
 				spin_unlock_irq(&zone->lru_lock);
-				__pagevec_free(&pages_to_free);
-				pagevec_reinit(&pages_to_free);
-				zone = NULL;	/* No lock is held */
+				zone = NULL;
 			}
+			__pagevec_free(&pages_to_free);
+			pagevec_reinit(&pages_to_free);
 		}
 	}
 	if (zone)
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 2/4] mm: PageLRU no testset
  2006-01-08  5:23 [patch 0/4] mm: de-skew page_count Nick Piggin
  2006-01-08  5:23 ` [patch 1/4] mm: page refcount use atomic primitives Nick Piggin
@ 2006-01-08  5:24 ` Nick Piggin
  2006-01-08  5:24 ` [patch 3/4] mm: PageActive " Nick Piggin
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2006-01-08  5:24 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: Nick Piggin, Andrew Morton

PG_lru is protected by zone->lru_lock. It does not need TestSet/TestClear
operations.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c
+++ linux-2.6/mm/vmscan.c
@@ -657,8 +657,8 @@ static void shrink_cache(struct zone *zo
 		 */
 		while (!list_empty(&page_list)) {
 			page = lru_to_page(&page_list);
-			if (TestSetPageLRU(page))
-				BUG();
+			BUG_ON(PageLRU(page));
+			SetPageLRU(page);
 			list_del(&page->lru);
 			if (PageActive(page))
 				add_page_to_active_list(zone, page);
@@ -770,8 +770,8 @@ refill_inactive_zone(struct zone *zone, 
 	while (!list_empty(&l_inactive)) {
 		page = lru_to_page(&l_inactive);
 		prefetchw_prev_lru_page(page, &l_inactive, flags);
-		if (TestSetPageLRU(page))
-			BUG();
+		BUG_ON(PageLRU(page));
+		SetPageLRU(page);
 		if (!TestClearPageActive(page))
 			BUG();
 		list_move(&page->lru, &zone->inactive_list);
@@ -799,8 +799,8 @@ refill_inactive_zone(struct zone *zone, 
 	while (!list_empty(&l_active)) {
 		page = lru_to_page(&l_active);
 		prefetchw_prev_lru_page(page, &l_active, flags);
-		if (TestSetPageLRU(page))
-			BUG();
+		BUG_ON(PageLRU(page));
+		SetPageLRU(page);
 		BUG_ON(!PageActive(page));
 		list_move(&page->lru, &zone->active_list);
 		pgmoved++;
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -185,8 +185,8 @@ void fastcall __page_cache_release(struc
 
 		struct zone *zone = page_zone(page);
 		spin_lock_irqsave(&zone->lru_lock, flags);
-		if (!TestClearPageLRU(page))
-			BUG();
+		BUG_ON(!PageLRU(page));
+		ClearPageLRU(page);
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
@@ -230,8 +230,8 @@ void release_pages(struct page **pages, 
 				zone = pagezone;
 				spin_lock_irq(&zone->lru_lock);
 			}
-			if (!TestClearPageLRU(page))
-				BUG();
+			BUG_ON(!PageLRU(page));
+			ClearPageLRU(page);
 			del_page_from_lru(zone, page);
 		}
 		BUG_ON(page_count(page));
@@ -311,8 +311,8 @@ void __pagevec_lru_add(struct pagevec *p
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
-		if (TestSetPageLRU(page))
-			BUG();
+		BUG_ON(PageLRU(page));
+		SetPageLRU(page);
 		add_page_to_inactive_list(zone, page);
 	}
 	if (zone)
@@ -338,8 +338,8 @@ void __pagevec_lru_add_active(struct pag
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
-		if (TestSetPageLRU(page))
-			BUG();
+		BUG_ON(PageLRU(page));
+		SetPageLRU(page);
 		if (TestSetPageActive(page))
 			BUG();
 		add_page_to_active_list(zone, page);
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -244,10 +244,9 @@ extern void __mod_page_state_offset(unsi
 #define __ClearPageDirty(page)	__clear_bit(PG_dirty, &(page)->flags)
 #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
 
-#define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
 #define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
-#define TestSetPageLRU(page)	test_and_set_bit(PG_lru, &(page)->flags)
-#define TestClearPageLRU(page)	test_and_clear_bit(PG_lru, &(page)->flags)
+#define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
+#define ClearPageLRU(page)	clear_bit(PG_lru, &(page)->flags)
 
 #define PageActive(page)	test_bit(PG_active, &(page)->flags)
 #define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 3/4] mm: PageActive no testset
  2006-01-08  5:23 [patch 0/4] mm: de-skew page_count Nick Piggin
  2006-01-08  5:23 ` [patch 1/4] mm: page refcount use atomic primitives Nick Piggin
  2006-01-08  5:24 ` [patch 2/4] mm: PageLRU no testset Nick Piggin
@ 2006-01-08  5:24 ` Nick Piggin
  2006-01-08  5:25 ` [patch 4/4] mm: less atomic ops Nick Piggin
  2006-01-08  5:42 ` [patch 0/4] mm: de-skew page_count Nick Piggin
  4 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2006-01-08  5:24 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: Nick Piggin, Andrew Morton

PG_active is protected by zone->lru_lock, it does not need TestSet/TestClear
operations.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c
+++ linux-2.6/mm/vmscan.c
@@ -772,8 +772,9 @@ refill_inactive_zone(struct zone *zone, 
 		prefetchw_prev_lru_page(page, &l_inactive, flags);
 		BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		if (!TestClearPageActive(page))
-			BUG();
+		BUG_ON(!PageActive(page));
+		ClearPageActive(page);
+
 		list_move(&page->lru, &zone->inactive_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -340,8 +340,8 @@ void __pagevec_lru_add_active(struct pag
 		}
 		BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		if (TestSetPageActive(page))
-			BUG();
+		BUG_ON(PageActive(page));
+		SetPageActive(page);
 		add_page_to_active_list(zone, page);
 	}
 	if (zone)
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -251,8 +251,6 @@ extern void __mod_page_state_offset(unsi
 #define PageActive(page)	test_bit(PG_active, &(page)->flags)
 #define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
 #define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
-#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
-#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags)
 
 #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
 #define SetPageSlab(page)	set_bit(PG_slab, &(page)->flags)
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 4/4] mm: less atomic ops
  2006-01-08  5:23 [patch 0/4] mm: de-skew page_count Nick Piggin
                   ` (2 preceding siblings ...)
  2006-01-08  5:24 ` [patch 3/4] mm: PageActive " Nick Piggin
@ 2006-01-08  5:25 ` Nick Piggin
  2006-01-08  5:42 ` [patch 0/4] mm: de-skew page_count Nick Piggin
  4 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2006-01-08  5:25 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: Nick Piggin, Andrew Morton

In the page release paths, we can be sure that nobody will mess with our
page->flags because the refcount has dropped to 0. So no need for atomic
operations here.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -247,10 +247,12 @@ extern void __mod_page_state_offset(unsi
 #define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
 #define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
 #define ClearPageLRU(page)	clear_bit(PG_lru, &(page)->flags)
+#define __ClearPageLRU(page)	__clear_bit(PG_lru, &(page)->flags)
 
 #define PageActive(page)	test_bit(PG_active, &(page)->flags)
 #define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
 #define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
+#define __ClearPageActive(page)	__clear_bit(PG_active, &(page)->flags)
 
 #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
 #define SetPageSlab(page)	set_bit(PG_slab, &(page)->flags)
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -186,7 +186,7 @@ void fastcall __page_cache_release(struc
 		struct zone *zone = page_zone(page);
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		BUG_ON(!PageLRU(page));
-		ClearPageLRU(page);
+		__ClearPageLRU(page);
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
@@ -231,7 +231,7 @@ void release_pages(struct page **pages, 
 				spin_lock_irq(&zone->lru_lock);
 			}
 			BUG_ON(!PageLRU(page));
-			ClearPageLRU(page);
+			__ClearPageLRU(page);
 			del_page_from_lru(zone, page);
 		}
 		BUG_ON(page_count(page));
Index: linux-2.6/include/linux/mm_inline.h
===================================================================
--- linux-2.6.orig/include/linux/mm_inline.h
+++ linux-2.6/include/linux/mm_inline.h
@@ -32,7 +32,7 @@ del_page_from_lru(struct zone *zone, str
 {
 	list_del(&page->lru);
 	if (PageActive(page)) {
-		ClearPageActive(page);
+		__ClearPageActive(page);
 		zone->nr_active--;
 	} else {
 		zone->nr_inactive--;
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 0/4] mm: de-skew page_count
  2006-01-08  5:23 [patch 0/4] mm: de-skew page_count Nick Piggin
                   ` (3 preceding siblings ...)
  2006-01-08  5:25 ` [patch 4/4] mm: less atomic ops Nick Piggin
@ 2006-01-08  5:42 ` Nick Piggin
  4 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2006-01-08  5:42 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: Andrew Morton

Nick Piggin wrote:
> The following patchset (against 2.6.15-rc5ish)

Sorry, that should read 2.6.15-git3 (latest git).

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 1/4] mm: page refcount use atomic primitives
  2006-01-08  5:23 ` [patch 1/4] mm: page refcount use atomic primitives Nick Piggin
@ 2006-01-08  5:54   ` Andrew Morton
  2006-01-08 20:40     ` Nick Piggin
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2006-01-08  5:54 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-mm, linux-kernel, nickpiggin

Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> The VM has an interesting race where a page refcount can drop to zero, but
> it is still on the LRU lists for a short time. This was solved by testing
> a 0->1 refcount transition when picking up pages from the LRU, and dropping
> the refcount in that case.

Tell me about it...

> Instead, use atomic_inc_not_zero to ensure we never pick up a 0 refcount
> page from the LRU (ie. we guarantee the page will not be touched).

atomic_inc_not_zero() looks rather bloaty, but a single call site is OK.

> This ensures we can test PageLRU without taking the lru_lock,

Let me write some changelog for you.

isolate_lru_pages() can remove live pages from the LRU at any time and
shrink_cache() can put them back at any time.  As we don't hold the
zone->lock we can race against that.

> void fastcall __page_cache_release(struct page *page)
> {
> 	if (PageLRU(page)) {
> 		unsigned long flags;

isolate_lru_pages() removes the page here.

> 		struct zone *zone = page_zone(page);
> 		spin_lock_irqsave(&zone->lru_lock, flags);
> 		if (!TestClearPageLRU(page))
> 			BUG();

blam.

> 		del_page_from_lru(zone, page);
> 		spin_unlock_irqrestore(&zone->lru_lock, flags);
> 	}
> 
> 	BUG_ON(page_count(page) != 0);
> 	free_hot_page(page);
> }
> 

But put_page() wouldn't have entered __page_cache_release() at all, because
isolate_lru_page() is changed by this patch to elevated the page refcount
prior to clearing PG_lru:

		BUG_ON(!PageLRU(page));
		list_del(&page->lru);
		target = src;
		if (get_page_unless_zero(page)) {
			ClearPageLRU(page);


So no blam.

That's from a two-minute-peek.  I haven't thought about this dreadfully
hard.  But I'd like to gain some confidence that you have, please.  This
stuff is tricky.

> and allows
> further optimisations (in later patches) -- we end up saving 2 atomic ops
> including a spin_lock_irqsave in the !PageLRU case, and 2 or 3 atomic ops
> in the PageLRU case.

Well yeah, but you've pretty much eliminated all those nice speedups by
adding several BUG_ON(atomic_op)s.  Everyone compiles with CONFIG_BUG.  So
I'd suggest that such new assertions be broken out into a separate -mm-only
patch.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 1/4] mm: page refcount use atomic primitives
  2006-01-08  5:54   ` Andrew Morton
@ 2006-01-08 20:40     ` Nick Piggin
  2006-01-08 21:43       ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: Nick Piggin @ 2006-01-08 20:40 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, linux-kernel

Andrew Morton wrote:
> Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> 
>>The VM has an interesting race where a page refcount can drop to zero, but
>>it is still on the LRU lists for a short time. This was solved by testing
>>a 0->1 refcount transition when picking up pages from the LRU, and dropping
>>the refcount in that case.
> 
> 
> Tell me about it...
> 
> 
>>Instead, use atomic_inc_not_zero to ensure we never pick up a 0 refcount
>>page from the LRU (ie. we guarantee the page will not be touched).
> 
> 
> atomic_inc_not_zero() looks rather bloaty, but a single call site is OK.
> 

A little. On generic cmpxchg/cas architectures it isn't too bad, and
LL/SC architectures presently implement it fairly stupidly with cmpxchg
but they can do a much better job using ll/sc directly.

> 
>>This ensures we can test PageLRU without taking the lru_lock,
> 
> 
> Let me write some changelog for you.
> 
> isolate_lru_pages() can remove live pages from the LRU at any time and
> shrink_cache() can put them back at any time.  As we don't hold the
> zone->lock we can race against that.
> 
> 
>>void fastcall __page_cache_release(struct page *page)
>>{
>>	if (PageLRU(page)) {
>>		unsigned long flags;
> 
> 
> isolate_lru_pages() removes the page here.
> 
> 
>>		struct zone *zone = page_zone(page);
>>		spin_lock_irqsave(&zone->lru_lock, flags);
>>		if (!TestClearPageLRU(page))
>>			BUG();
> 
> 
> blam.
> 
> 
>>		del_page_from_lru(zone, page);
>>		spin_unlock_irqrestore(&zone->lru_lock, flags);
>>	}
>>
>>	BUG_ON(page_count(page) != 0);
>>	free_hot_page(page);
>>}
>>
> 
> 
> But put_page() wouldn't have entered __page_cache_release() at all, because
> isolate_lru_page() is changed by this patch to elevated the page refcount
> prior to clearing PG_lru:
> 
> 		BUG_ON(!PageLRU(page));
> 		list_del(&page->lru);
> 		target = src;
> 		if (get_page_unless_zero(page)) {
> 			ClearPageLRU(page);
> 
> 
> So no blam.
> 
> That's from a two-minute-peek.  I haven't thought about this dreadfully
> hard.  But I'd like to gain some confidence that you have, please.  This
> stuff is tricky.
> 

Right, no blam. This is how I avoid taking the LRU lock.

  "Instead, use atomic_inc_not_zero to ensure we never **pick up a 0 refcount**
   page from the LRU (ie. we guarantee the page will not be touched)."

I don't understand what you're asking?

> 
>>and allows
>>further optimisations (in later patches) -- we end up saving 2 atomic ops
>>including a spin_lock_irqsave in the !PageLRU case, and 2 or 3 atomic ops
>>in the PageLRU case.
> 
> 
> Well yeah, but you've pretty much eliminated all those nice speedups by
> adding several BUG_ON(atomic_op)s.  Everyone compiles with CONFIG_BUG.  So
> I'd suggest that such new assertions be broken out into a separate -mm-only
> patch.
> 

Not quite eliminated because ClearPageXXX is an atomic RMW, __ClearPageXXX
is not. Also TestClearXXX includes memory barriers.

Anyway I wanted to keep it equivalent (ie. keep bugs there). I have a future
patch to move these assertions under CONFIG_DEBUG_VM.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 1/4] mm: page refcount use atomic primitives
  2006-01-08 20:40     ` Nick Piggin
@ 2006-01-08 21:43       ` Andrew Morton
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2006-01-08 21:43 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-mm, linux-kernel

Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> > That's from a two-minute-peek.  I haven't thought about this dreadfully
>  > hard.  But I'd like to gain some confidence that you have, please.  This
>  > stuff is tricky.
>  > 
> 
>  Right, no blam. This is how I avoid taking the LRU lock.
> 
>    "Instead, use atomic_inc_not_zero to ensure we never **pick up a 0 refcount**
>     page from the LRU (ie. we guarantee the page will not be touched)."
> 
>  I don't understand what you're asking?

Well if you don't understand me, how do you expect me to?

Ho hum.  Please redo the patches into something which vaguely applies and
let's give them a spin.

I would prefer that the BUG_ONs be split into a separate patch tho.  Or at
least, let's take a real close look at whether they're really needed.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 4/4] mm: less atomic ops
  2006-01-18 10:40 [patch 0/4] mm: de-skew page refcount Nick Piggin
@ 2006-01-18 10:41 ` Nick Piggin
  0 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2006-01-18 10:41 UTC (permalink / raw)
  To: Linux Memory Management, Linux Kernel Mailing List
  Cc: Hugh Dickins, Andrew Morton, Andrea Arcangeli, Nick Piggin,
	Linus Torvalds, David Miller

In the page release paths, we can be sure that nobody will mess with our
page->flags because the refcount has dropped to 0. So no need for atomic
operations here.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -247,10 +247,12 @@ extern void __mod_page_state_offset(unsi
 #define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
 #define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
 #define ClearPageLRU(page)	clear_bit(PG_lru, &(page)->flags)
+#define __ClearPageLRU(page)	__clear_bit(PG_lru, &(page)->flags)
 
 #define PageActive(page)	test_bit(PG_active, &(page)->flags)
 #define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
 #define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
+#define __ClearPageActive(page)	__clear_bit(PG_active, &(page)->flags)
 
 #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
 #define SetPageSlab(page)	set_bit(PG_slab, &(page)->flags)
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -204,7 +204,7 @@ void fastcall __page_cache_release(struc
 		struct zone *zone = page_zone(page);
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		BUG_ON(!PageLRU(page));
-		ClearPageLRU(page);
+		__ClearPageLRU(page);
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
@@ -248,7 +248,7 @@ void release_pages(struct page **pages, 
 				spin_lock_irq(&zone->lru_lock);
 			}
 			BUG_ON(!PageLRU(page));
-			ClearPageLRU(page);
+			__ClearPageLRU(page);
 			del_page_from_lru(zone, page);
 		}
 
Index: linux-2.6/include/linux/mm_inline.h
===================================================================
--- linux-2.6.orig/include/linux/mm_inline.h
+++ linux-2.6/include/linux/mm_inline.h
@@ -32,7 +32,7 @@ del_page_from_lru(struct zone *zone, str
 {
 	list_del(&page->lru);
 	if (PageActive(page)) {
-		ClearPageActive(page);
+		__ClearPageActive(page);
 		zone->nr_active--;
 	} else {
 		zone->nr_inactive--;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 4/4] mm: less atomic ops
  2005-12-05 11:59 [patch 0/4] mm: optimisations Nick Piggin
@ 2005-12-05 12:01 ` Nick Piggin
  0 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2005-12-05 12:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

In the page release paths, we can be sure that nobody will mess with our
page->flags because the refcount has dropped to 0. So no need for atomic
operations here.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -236,10 +236,12 @@ extern void __mod_page_state_offset(unsi
 #define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
 #define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
 #define ClearPageLRU(page)	clear_bit(PG_lru, &(page)->flags)
+#define __ClearPageLRU(page)	__clear_bit(PG_lru, &(page)->flags)
 
 #define PageActive(page)	test_bit(PG_active, &(page)->flags)
 #define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
 #define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
+#define __ClearPageActive(page)	__clear_bit(PG_active, &(page)->flags)
 
 #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
 #define SetPageSlab(page)	set_bit(PG_slab, &(page)->flags)
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -180,7 +180,7 @@ void fastcall __page_cache_release(struc
 		struct zone *zone = page_zone(page);
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		BUG_ON(!PageLRU(page));
-		ClearPageLRU(page);
+		__ClearPageLRU(page);
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
@@ -225,7 +225,7 @@ void release_pages(struct page **pages, 
 				spin_lock_irq(&zone->lru_lock);
 			}
 			BUG_ON(!PageLRU(page));
-			ClearPageLRU(page);
+			__ClearPageLRU(page);
 			del_page_from_lru(zone, page);
 		}
 		BUG_ON(page_count(page));
Index: linux-2.6/include/linux/mm_inline.h
===================================================================
--- linux-2.6.orig/include/linux/mm_inline.h
+++ linux-2.6/include/linux/mm_inline.h
@@ -32,7 +32,7 @@ del_page_from_lru(struct zone *zone, str
 {
 	list_del(&page->lru);
 	if (PageActive(page)) {
-		ClearPageActive(page);
+		__ClearPageActive(page);
 		zone->nr_active--;
 	} else {
 		zone->nr_inactive--;
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2006-01-18 10:41 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-08  5:23 [patch 0/4] mm: de-skew page_count Nick Piggin
2006-01-08  5:23 ` [patch 1/4] mm: page refcount use atomic primitives Nick Piggin
2006-01-08  5:54   ` Andrew Morton
2006-01-08 20:40     ` Nick Piggin
2006-01-08 21:43       ` Andrew Morton
2006-01-08  5:24 ` [patch 2/4] mm: PageLRU no testset Nick Piggin
2006-01-08  5:24 ` [patch 3/4] mm: PageActive " Nick Piggin
2006-01-08  5:25 ` [patch 4/4] mm: less atomic ops Nick Piggin
2006-01-08  5:42 ` [patch 0/4] mm: de-skew page_count Nick Piggin
  -- strict thread matches above, loose matches on Subject: below --
2006-01-18 10:40 [patch 0/4] mm: de-skew page refcount Nick Piggin
2006-01-18 10:41 ` [patch 4/4] mm: less atomic ops Nick Piggin
2005-12-05 11:59 [patch 0/4] mm: optimisations Nick Piggin
2005-12-05 12:01 ` [patch 4/4] mm: less atomic ops Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).