All of lore.kernel.org
 help / color / mirror / Atom feed
* [rfc][patch 0/14] mm: performance improvements
@ 2005-11-06  8:11 Nick Piggin
  2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
  2005-11-07  1:39 ` [rfc][patch 0/14] mm: performance improvements Christoph Hellwig
  0 siblings, 2 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:11 UTC (permalink / raw)
  To: Linux Kernel Mailing List

The following patchset is a set of performance optimisations
for the mm subsystem. They mainly focus on the page allocator
because that is a very hot path for kbuild, which is my target
workload.

Performance improvements are not finely documented yet, so they
are not indented for merging yet. Also some rmap optimisations
that Hugh probably won't have time to ACK for a while.

However, a slightly older patchset was able to decrease kernel
residency by about 5% for UP, and 7.5% for SMP on a dual Xeon
doing kbuild.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 1/14] mm: opt rmqueue
  2005-11-06  8:11 [rfc][patch 0/14] mm: performance improvements Nick Piggin
@ 2005-11-06  8:20 ` Nick Piggin
  2005-11-06  8:20   ` [patch 2/14] mm: Nick Piggin
                     ` (2 more replies)
  2005-11-07  1:39 ` [rfc][patch 0/14] mm: performance improvements Christoph Hellwig
  1 sibling, 3 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:20 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

1/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-pagealloc-opt.patch --]
[-- Type: text/plain, Size: 2906 bytes --]

Slightly optimise some page allocation and freeing functions by
taking advantage of knowing whether or not interrupts are disabled.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -373,11 +373,10 @@ static int
 free_pages_bulk(struct zone *zone, int count,
 		struct list_head *list, unsigned int order)
 {
-	unsigned long flags;
 	struct page *page = NULL;
 	int ret = 0;
 
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 	while (!list_empty(list) && count--) {
@@ -387,12 +386,13 @@ free_pages_bulk(struct zone *zone, int c
 		__free_pages_bulk(page, zone, order);
 		ret++;
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return ret;
 }
 
 void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long flags;
 	LIST_HEAD(list);
 	int i;
 
@@ -410,7 +410,9 @@ void __free_pages_ok(struct page *page, 
 		free_pages_check(__FUNCTION__, page + i);
 	list_add(&page->lru, &list);
 	kernel_map_pages(page, 1<<order, 0);
+	local_irq_save(flags);
 	free_pages_bulk(page_zone(page), 1, &list, order);
+	local_irq_restore(flags);
 }
 
 
@@ -526,12 +528,11 @@ static struct page *__rmqueue(struct zon
 static int rmqueue_bulk(struct zone *zone, unsigned int order, 
 			unsigned long count, struct list_head *list)
 {
-	unsigned long flags;
 	int i;
 	int allocated = 0;
 	struct page *page;
 	
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
 		page = __rmqueue(zone, order);
 		if (page == NULL)
@@ -539,7 +540,7 @@ static int rmqueue_bulk(struct zone *zon
 		allocated++;
 		list_add_tail(&page->lru, list);
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return allocated;
 }
 
@@ -576,6 +577,7 @@ void drain_remote_pages(void)
 #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
 static void __drain_pages(unsigned int cpu)
 {
+	unsigned long flags;
 	struct zone *zone;
 	int i;
 
@@ -587,8 +589,10 @@ static void __drain_pages(unsigned int c
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
+			local_irq_save(flags);
 			pcp->count -= free_pages_bulk(zone, pcp->count,
 						&pcp->list, 0);
+			local_irq_restore(flags);
 		}
 	}
 }
@@ -726,16 +730,14 @@ buffered_rmqueue(struct zone *zone, int 
 		if (pcp->count <= pcp->low)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (pcp->count) {
+		if (likely(pcp->count)) {
 			page = list_entry(pcp->list.next, struct page, lru);
 			list_del(&page->lru);
 			pcp->count--;
 		}
 		local_irq_restore(flags);
 		put_cpu();
-	}
-
-	if (page == NULL) {
+	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
 		spin_unlock_irqrestore(&zone->lock, flags);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 2/14] mm:
  2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
@ 2005-11-06  8:20   ` Nick Piggin
  2005-11-06  8:20   ` [patch 2/14] mm: pte prefetch Nick Piggin
  2005-11-06 17:37   ` [patch 1/14] mm: opt rmqueue Andi Kleen
  2 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:20 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

2/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-pte-prefetch.patch --]
[-- Type: text/plain, Size: 2648 bytes --]

Prefetch ptes a line ahead. Worth 25% on ia64 when doing big forks.

Index: linux-2.6/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.orig/include/asm-generic/pgtable.h
+++ linux-2.6/include/asm-generic/pgtable.h
@@ -196,6 +196,33 @@ static inline void ptep_set_wrprotect(st
 })
 #endif
 
+#ifndef __HAVE_ARCH_PTE_PREFETCH
+#define PTES_PER_LINE (L1_CACHE_BYTES / sizeof(pte_t))
+#define PTE_LINE_MASK (~(PTES_PER_LINE - 1))
+#define ADDR_PER_LINE (PTES_PER_LINE << PAGE_SHIFT)
+#define ADDR_LINE_MASK (~(ADDR_PER_LINE - 1))
+
+#define pte_prefetch(pte, addr, end)					\
+({									\
+	unsigned long __nextline = ((addr) + ADDR_PER_LINE) & ADDR_LINE_MASK; \
+	if (__nextline < (end))						\
+		prefetch(pte + PTES_PER_LINE);				\
+})
+
+#define pte_prefetch_start(pte, addr, end)				\
+({									\
+ 	prefetch(pte);							\
+ 	pte_prefetch(pte, addr, end);					\
+})
+
+#define pte_prefetch_next(pte, addr, end)				\
+({									\
+	unsigned long __addr = (addr);					\
+	if (!(__addr & ~ADDR_LINE_MASK)) /* We hit a new cacheline */	\
+		pte_prefetch(pte, __addr, end);				\
+})
+#endif
+
 #ifndef __ASSEMBLY__
 /*
  * When walking page tables, we usually want to skip any p?d_none entries;
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -437,6 +437,8 @@ again:
 	if (!dst_pte)
 		return -ENOMEM;
 	src_pte = pte_offset_map_nested(src_pmd, addr);
+	pte_prefetch_start(src_pte, addr, end);
+
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock(src_ptl);
 
@@ -458,7 +460,8 @@ again:
 		}
 		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
 		progress += 8;
-	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+	} while (dst_pte++, src_pte++, addr += PAGE_SIZE,
+			pte_prefetch_next(src_pte, addr, end), addr != end);
 
 	spin_unlock(src_ptl);
 	pte_unmap_nested(src_pte - 1);
@@ -561,6 +564,7 @@ static unsigned long zap_pte_range(struc
 	int anon_rss = 0;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	pte_prefetch_start(pte, addr, end);
 	do {
 		pte_t ptent = *pte;
 		if (pte_none(ptent)) {
@@ -629,7 +633,8 @@ static unsigned long zap_pte_range(struc
 		if (!pte_file(ptent))
 			free_swap_and_cache(pte_to_swp_entry(ptent));
 		pte_clear_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+	} while (pte++, addr += PAGE_SIZE, pte_prefetch_next(pte, addr, end),
+			(addr != end && *zap_work > 0));
 
 	add_mm_rss(mm, file_rss, anon_rss);
 	pte_unmap_unlock(pte - 1, ptl);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 2/14] mm: pte prefetch
  2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
  2005-11-06  8:20   ` [patch 2/14] mm: Nick Piggin
@ 2005-11-06  8:20   ` Nick Piggin
  2005-11-06  8:21     ` [patch 3/14] mm: release opt Nick Piggin
  2005-11-06  8:35     ` [patch 2/14] mm: pte prefetch Arjan van de Ven
  2005-11-06 17:37   ` [patch 1/14] mm: opt rmqueue Andi Kleen
  2 siblings, 2 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:20 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

2/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-pte-prefetch.patch --]
[-- Type: text/plain, Size: 2648 bytes --]

Prefetch ptes a line ahead. Worth 25% on ia64 when doing big forks.

Index: linux-2.6/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.orig/include/asm-generic/pgtable.h
+++ linux-2.6/include/asm-generic/pgtable.h
@@ -196,6 +196,33 @@ static inline void ptep_set_wrprotect(st
 })
 #endif
 
+#ifndef __HAVE_ARCH_PTE_PREFETCH
+#define PTES_PER_LINE (L1_CACHE_BYTES / sizeof(pte_t))
+#define PTE_LINE_MASK (~(PTES_PER_LINE - 1))
+#define ADDR_PER_LINE (PTES_PER_LINE << PAGE_SHIFT)
+#define ADDR_LINE_MASK (~(ADDR_PER_LINE - 1))
+
+#define pte_prefetch(pte, addr, end)					\
+({									\
+	unsigned long __nextline = ((addr) + ADDR_PER_LINE) & ADDR_LINE_MASK; \
+	if (__nextline < (end))						\
+		prefetch(pte + PTES_PER_LINE);				\
+})
+
+#define pte_prefetch_start(pte, addr, end)				\
+({									\
+ 	prefetch(pte);							\
+ 	pte_prefetch(pte, addr, end);					\
+})
+
+#define pte_prefetch_next(pte, addr, end)				\
+({									\
+	unsigned long __addr = (addr);					\
+	if (!(__addr & ~ADDR_LINE_MASK)) /* We hit a new cacheline */	\
+		pte_prefetch(pte, __addr, end);				\
+})
+#endif
+
 #ifndef __ASSEMBLY__
 /*
  * When walking page tables, we usually want to skip any p?d_none entries;
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -437,6 +437,8 @@ again:
 	if (!dst_pte)
 		return -ENOMEM;
 	src_pte = pte_offset_map_nested(src_pmd, addr);
+	pte_prefetch_start(src_pte, addr, end);
+
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock(src_ptl);
 
@@ -458,7 +460,8 @@ again:
 		}
 		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
 		progress += 8;
-	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+	} while (dst_pte++, src_pte++, addr += PAGE_SIZE,
+			pte_prefetch_next(src_pte, addr, end), addr != end);
 
 	spin_unlock(src_ptl);
 	pte_unmap_nested(src_pte - 1);
@@ -561,6 +564,7 @@ static unsigned long zap_pte_range(struc
 	int anon_rss = 0;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	pte_prefetch_start(pte, addr, end);
 	do {
 		pte_t ptent = *pte;
 		if (pte_none(ptent)) {
@@ -629,7 +633,8 @@ static unsigned long zap_pte_range(struc
 		if (!pte_file(ptent))
 			free_swap_and_cache(pte_to_swp_entry(ptent));
 		pte_clear_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+	} while (pte++, addr += PAGE_SIZE, pte_prefetch_next(pte, addr, end),
+			(addr != end && *zap_work > 0));
 
 	add_mm_rss(mm, file_rss, anon_rss);
 	pte_unmap_unlock(pte - 1, ptl);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 3/14] mm: release opt
  2005-11-06  8:20   ` [patch 2/14] mm: pte prefetch Nick Piggin
@ 2005-11-06  8:21     ` Nick Piggin
  2005-11-06  8:22       ` [patch 4/14] mm: rmap opt Nick Piggin
  2005-11-06  8:35     ` [patch 2/14] mm: pte prefetch Arjan van de Ven
  1 sibling, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:21 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

3/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-release-opt.patch --]
[-- Type: text/plain, Size: 1495 bytes --]

Optimise some pagevec functions by not reenabling irqs while
switching lru locks.

Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -220,10 +220,13 @@ void release_pages(struct page **pages, 
 
 		pagezone = page_zone(page);
 		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
+			spin_lock_prefetch(&pagezone->lru_lock);
+			if (!zone)
+				local_irq_disable();
+			else
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+			spin_lock(&zone->lru_lock);
 		}
 		if (TestClearPageLRU(page))
 			del_page_from_lru(zone, page);
@@ -297,10 +300,12 @@ void __pagevec_lru_add(struct pagevec *p
 		struct zone *pagezone = page_zone(page);
 
 		if (pagezone != zone) {
+			if (!zone)
+				local_irq_disable();
 			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+			spin_lock(&zone->lru_lock);
 		}
 		if (TestSetPageLRU(page))
 			BUG();
@@ -324,10 +329,12 @@ void __pagevec_lru_add_active(struct pag
 		struct zone *pagezone = page_zone(page);
 
 		if (pagezone != zone) {
+			if (!zone)
+				local_irq_disable();
 			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+			spin_lock(&zone->lru_lock);
 		}
 		if (TestSetPageLRU(page))
 			BUG();

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 4/14] mm: rmap opt
  2005-11-06  8:21     ` [patch 3/14] mm: release opt Nick Piggin
@ 2005-11-06  8:22       ` Nick Piggin
  2005-11-06  8:23         ` [patch 5/14] mm: set_page_refs opt Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:22 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

4/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-rmap-opt.patch --]
[-- Type: text/plain, Size: 6853 bytes --]

Slightly optimise rmap functions by minimising atomic operations when
we know there will be no concurrent modifications.

Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -71,6 +71,7 @@ void __anon_vma_link(struct vm_area_stru
  * rmap interfaces called when adding or removing pte of page
  */
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
 
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1337,14 +1337,15 @@ static int do_wp_page(struct mm_struct *
 			inc_mm_counter(mm, anon_rss);
 			dec_mm_counter(mm, file_rss);
 		}
+
 		flush_cache_page(vma, address, pfn);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		ptep_establish(vma, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		lazy_mmu_prot_update(entry);
+		page_add_new_anon_rmap(new_page, vma, address);
 		lru_cache_add_active(new_page);
-		page_add_anon_rmap(new_page, vma, address);
 
 		/* Free the old page.. */
 		new_page = old_page;
@@ -1796,9 +1797,8 @@ static int do_anonymous_page(struct mm_s
 		if (!pte_none(*page_table))
 			goto release;
 		inc_mm_counter(mm, anon_rss);
+		page_add_new_anon_rmap(page, vma, address);
 		lru_cache_add_active(page);
-		SetPageReferenced(page);
-		page_add_anon_rmap(page, vma, address);
 	} else {
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
@@ -1924,11 +1924,10 @@ retry:
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
+			page_add_new_anon_rmap(new_page, vma, address);
 			lru_cache_add_active(new_page);
-			page_add_anon_rmap(new_page, vma, address);
 		} else if (!(vma->vm_flags & VM_RESERVED)) {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
@@ -1939,6 +1938,7 @@ retry:
 		goto unlock;
 	}
 
+	set_pte_at(mm, address, page_table, entry);
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
 	lazy_mmu_prot_update(entry);
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -440,6 +440,26 @@ int page_referenced(struct page *page, i
 }
 
 /**
+ * page_set_anon_rmap - setup new anonymous rmap
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
+ */
+static void __page_set_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = vma->anon_vma;
+
+	BUG_ON(!anon_vma);
+	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+	page->mapping = (struct address_space *) anon_vma;
+
+	page->index = linear_page_index(vma, address);
+
+	inc_page_state(nr_mapped);
+}
+
+/**
  * page_add_anon_rmap - add pte mapping to an anonymous page
  * @page:	the page to add the mapping to
  * @vma:	the vm area in which the mapping is added
@@ -450,21 +470,28 @@ int page_referenced(struct page *page, i
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	if (atomic_inc_and_test(&page->_mapcount)) {
-		struct anon_vma *anon_vma = vma->anon_vma;
-
-		BUG_ON(!anon_vma);
-		anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
-		page->mapping = (struct address_space *) anon_vma;
-
-		page->index = linear_page_index(vma, address);
-
-		inc_page_state(nr_mapped);
-	}
+	if (atomic_inc_and_test(&page->_mapcount))
+		__page_set_anon_rmap(page, vma, address);
 	/* else checking page index and mapping is racy */
 }
 
 /**
+ * page_add_new_anon_rmap - add pte mapping to a new anonymous page
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
+ *
+ * same as page_add_anon_rmap but must only be called on *new* pages.
+ */
+void page_add_new_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
+	__page_set_anon_rmap(page, vma, address);
+}
+
+
+/**
  * page_add_file_rmap - add pte mapping to a file page
  * @page: the page to add the mapping to
  *
@@ -487,21 +514,28 @@ void page_add_file_rmap(struct page *pag
  */
 void page_remove_rmap(struct page *page)
 {
-	if (atomic_add_negative(-1, &page->_mapcount)) {
+	int fast = (page_mapcount(page) == 1) &
+			PageAnon(page) & (!PageSwapCache(page));
+
+	/* fast page may become SwapCache here, but nothing new will map it. */
+	if (fast)
+		reset_page_mapcount(page);
+	else if (atomic_add_negative(-1, &page->_mapcount))
 		BUG_ON(page_mapcount(page) < 0);
-		/*
-		 * It would be tidy to reset the PageAnon mapping here,
-		 * but that might overwrite a racing page_add_anon_rmap
-		 * which increments mapcount after us but sets mapping
-		 * before us: so leave the reset to free_hot_cold_page,
-		 * and remember that it's only reliable while mapped.
-		 * Leaving it set also helps swapoff to reinstate ptes
-		 * faster for those pages still in swapcache.
-		 */
 		if (page_test_and_clear_dirty(page))
 			set_page_dirty(page);
-		dec_page_state(nr_mapped);
-	}
+	else
+		return; /* non zero mapcount */
+	/*
+	 * It would be tidy to reset the PageAnon mapping here,
+	 * but that might overwrite a racing page_add_anon_rmap
+	 * which increments mapcount after us but sets mapping
+	 * before us: so leave the reset to free_hot_cold_page,
+	 * and remember that it's only reliable while mapped.
+	 * Leaving it set also helps swapoff to reinstate ptes
+	 * faster for those pages still in swapcache.
+	 */
+	dec_page_state(nr_mapped);
 }
 
 /*
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -182,6 +182,7 @@ extern void __mod_page_state(unsigned lo
 
 #define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
 #define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
+#define __SetPageReferenced(page)	__set_bit(PG_referenced, &(page)->flags)
 #define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
 #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 5/14] mm: set_page_refs opt
  2005-11-06  8:22       ` [patch 4/14] mm: rmap opt Nick Piggin
@ 2005-11-06  8:23         ` Nick Piggin
  2005-11-06  8:24           ` [patch 6/14] mm: microopt conditions Nick Piggin
  2005-11-07  1:40           ` [patch 5/14] mm: set_page_refs opt Christoph Hellwig
  0 siblings, 2 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:23 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

5/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-set_page_refs-opt.patch --]
[-- Type: text/plain, Size: 2767 bytes --]

Inline set_page_refs. Remove mm/internal.h

Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -19,7 +19,6 @@
 #include <linux/module.h>
 #include <asm/dma.h>
 #include <asm/io.h>
-#include "internal.h"
 
 /*
  * Access to this subsystem has to be serialized externally. (this is
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -38,7 +38,6 @@
 #include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
-#include "internal.h"
 
 /*
  * MCD - HACK: Find somewhere to initialize this EARLY, or make this
@@ -448,23 +447,6 @@ expand(struct zone *zone, struct page *p
 	return page;
 }
 
-void set_page_refs(struct page *page, int order)
-{
-#ifdef CONFIG_MMU
-	set_page_count(page, 1);
-#else
-	int i;
-
-	/*
-	 * We need to reference all the pages for this order, otherwise if
-	 * anyone accesses one of the pages with (get/put) it will be freed.
-	 * - eg: access_process_vm()
-	 */
-	for (i = 0; i < (1 << order); i++)
-		set_page_count(page + i, 1);
-#endif /* CONFIG_MMU */
-}
-
 /*
  * This page is about to be returned from the page allocator
  */
Index: linux-2.6/mm/internal.h
===================================================================
--- linux-2.6.orig/mm/internal.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* internal.h: mm/ internal definitions
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* page_alloc.c */
-extern void set_page_refs(struct page *page, int order);
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -315,6 +315,23 @@ struct page {
 #define set_page_count(p,v) 	atomic_set(&(p)->_count, v - 1)
 #define __put_page(p)		atomic_dec(&(p)->_count)
 
+static inline void set_page_refs(struct page *page, int order)
+{
+#ifdef CONFIG_MMU
+	set_page_count(page, 1);
+#else
+	int i;
+
+	/*
+	 * We need to reference all the pages for this order, otherwise if
+	 * anyone accesses one of the pages with (get/put) it will be freed.
+	 * - eg: access_process_vm()
+	 */
+	for (i = 0; i < (1 << order); i++)
+		set_page_count(page + i, 1);
+#endif /* CONFIG_MMU */
+}
+
 extern void FASTCALL(__page_cache_release(struct page *));
 
 #ifdef CONFIG_HUGETLB_PAGE

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 6/14] mm: microopt conditions
  2005-11-06  8:23         ` [patch 5/14] mm: set_page_refs opt Nick Piggin
@ 2005-11-06  8:24           ` Nick Piggin
  2005-11-06  8:24             ` [patch 7/14] mm: remove bad_range Nick Piggin
  2005-11-07  1:40           ` [patch 5/14] mm: set_page_refs opt Christoph Hellwig
  1 sibling, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:24 UTC (permalink / raw)
  Cc: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

6/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-microopt-conditions.patch --]
[-- Type: text/plain, Size: 1522 bytes --]

Micro optimise some conditionals where we don't need lazy evaluation.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -339,9 +339,9 @@ static inline void __free_pages_bulk (st
 
 static inline void free_pages_check(const char *function, struct page *page)
 {
-	if (	page_mapcount(page) ||
-		page->mapping != NULL ||
-		page_count(page) != 0 ||
+	if (unlikely(page_mapcount(page) |
+		(page->mapping != NULL)  |
+		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
 			1 << PG_private |
@@ -351,7 +351,7 @@ static inline void free_pages_check(cons
 			1 << PG_slab	|
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved )))
+			1 << PG_reserved ))))
 		bad_page(function, page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
@@ -452,9 +452,9 @@ expand(struct zone *zone, struct page *p
  */
 static void prep_new_page(struct page *page, int order)
 {
-	if (	page_mapcount(page) ||
-		page->mapping != NULL ||
-		page_count(page) != 0 ||
+	if (unlikely(page_mapcount(page) |
+		(page->mapping != NULL)  |
+		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
 			1 << PG_private	|
@@ -465,7 +465,7 @@ static void prep_new_page(struct page *p
 			1 << PG_slab    |
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved )))
+			1 << PG_reserved ))))
 		bad_page(__FUNCTION__, page);
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 7/14] mm: remove bad_range
  2005-11-06  8:24           ` [patch 6/14] mm: microopt conditions Nick Piggin
@ 2005-11-06  8:24             ` Nick Piggin
  2005-11-06  8:25               ` [patch 8/14] mm: remove pcp_low Nick Piggin
  2005-11-06 17:37               ` [patch 7/14] mm: remove bad_range Bob Picco
  0 siblings, 2 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:24 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

7/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-remove-bad_range.patch --]
[-- Type: text/plain, Size: 1314 bytes --]

bad_range is supposed to be a temporary check. It would be a pity to throw
it out. Make it depend on CONFIG_DEBUG_VM instead.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -78,6 +78,7 @@ int min_free_kbytes = 1024;
 unsigned long __initdata nr_kernel_pages;
 unsigned long __initdata nr_all_pages;
 
+#ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
 	int ret = 0;
@@ -119,6 +120,13 @@ static int bad_range(struct zone *zone, 
 	return 0;
 }
 
+#else
+static inline int bad_range(struct zone *zone, struct page *page)
+{
+	return 0;
+}
+#endif
+
 static void bad_page(const char *function, struct page *page)
 {
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
Index: linux-2.6/lib/Kconfig.debug
===================================================================
--- linux-2.6.orig/lib/Kconfig.debug
+++ linux-2.6/lib/Kconfig.debug
@@ -172,7 +172,8 @@ config DEBUG_VM
 	bool "Debug VM"
 	depends on DEBUG_KERNEL
 	help
-	  Enable this to debug the virtual-memory system.
+	  Enable this to turn on extended checks in the virtual-memory system
+          that may impact performance.
 
 	  If unsure, say N.
 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 8/14] mm: remove pcp_low
  2005-11-06  8:24             ` [patch 7/14] mm: remove bad_range Nick Piggin
@ 2005-11-06  8:25               ` Nick Piggin
  2005-11-06  8:25                 ` [patch 9/14] mm: page_state opt Nick Piggin
  2005-11-06 17:37               ` [patch 7/14] mm: remove bad_range Bob Picco
  1 sibling, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:25 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

8/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-remove-pcp-low.patch --]
[-- Type: text/plain, Size: 2256 bytes --]

pcp->low is useless.

Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -46,7 +46,6 @@ struct zone_padding {
 
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
-	int low;		/* low watermark, refill needed */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
 	struct list_head list;	/* the list of pages */
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -712,7 +712,7 @@ buffered_rmqueue(struct zone *zone, int 
 
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
+		if (!pcp->count)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
 		if (likely(pcp->count)) {
@@ -1324,10 +1324,9 @@ void show_free_areas(void)
 			pageset = zone_pcp(zone, cpu);
 
 			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
+				printk("cpu %d %s: high %d, batch %d used:%d\n",
 					cpu,
 					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].low,
 					pageset->pcp[temperature].high,
 					pageset->pcp[temperature].batch,
 					pageset->pcp[temperature].count);
@@ -1765,14 +1764,12 @@ inline void setup_pageset(struct per_cpu
 
 	pcp = &p->pcp[0];		/* hot */
 	pcp->count = 0;
-	pcp->low = 0;
-	pcp->high = 6 * batch;
+	pcp->high = 4 * batch;
 	pcp->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&pcp->list);
 
 	pcp = &p->pcp[1];		/* cold*/
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 2 * batch;
 	pcp->batch = max(1UL, batch/2);
 	INIT_LIST_HEAD(&pcp->list);
@@ -2169,12 +2166,10 @@ static int zoneinfo_show(struct seq_file
 				seq_printf(m,
 					   "\n    cpu: %i pcp: %i"
 					   "\n              count: %i"
-					   "\n              low:   %i"
 					   "\n              high:  %i"
 					   "\n              batch: %i",
 					   i, j,
 					   pageset->pcp[j].count,
-					   pageset->pcp[j].low,
 					   pageset->pcp[j].high,
 					   pageset->pcp[j].batch);
 			}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 9/14] mm: page_state opt
  2005-11-06  8:25               ` [patch 8/14] mm: remove pcp_low Nick Piggin
@ 2005-11-06  8:25                 ` Nick Piggin
  2005-11-06  8:26                   ` [patch 10/14] mm: single pcp list Nick Piggin
  2005-11-13  2:38                   ` [patch 9/14] mm: page_state opt Andi Kleen
  0 siblings, 2 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:25 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 34 bytes --]

9/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-page_state-opt.patch --]
[-- Type: text/plain, Size: 7668 bytes --]

Optimise page_state manipulations by introducing a direct accessor
to page_state fields without disabling interrupts, in which case
the callers must provide their own locking (either disable interrupts
or not update from interrupt context).

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -138,6 +138,7 @@ extern void get_page_state_node(struct p
 extern void get_full_page_state(struct page_state *ret);
 extern unsigned long __read_page_state(unsigned long offset);
 extern void __mod_page_state(unsigned long offset, unsigned long delta);
+extern unsigned long *__page_state(unsigned long offset);
 
 #define read_page_state(member) \
 	__read_page_state(offsetof(struct page_state, member))
@@ -150,16 +151,26 @@ extern void __mod_page_state(unsigned lo
 #define add_page_state(member,delta) mod_page_state(member, (delta))
 #define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
 
-#define mod_page_state_zone(zone, member, delta)				\
-	do {									\
-		unsigned offset;						\
-		if (is_highmem(zone))						\
-			offset = offsetof(struct page_state, member##_high);	\
-		else if (is_normal(zone))					\
-			offset = offsetof(struct page_state, member##_normal);	\
-		else								\
-			offset = offsetof(struct page_state, member##_dma);	\
-		__mod_page_state(offset, (delta));				\
+#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
+
+#define state_zone_offset(zone, member)					\
+({									\
+	unsigned offset;						\
+	if (is_highmem(zone))						\
+		offset = offsetof(struct page_state, member##_high);	\
+	else if (is_normal(zone))					\
+		offset = offsetof(struct page_state, member##_normal);	\
+	else								\
+		offset = offsetof(struct page_state, member##_dma);	\
+	offset;								\
+})
+
+#define page_state_zone(zone, member)					\
+	(*__page_state(state_zone_offset(zone, member)))
+
+#define mod_page_state_zone(zone, member, delta)			\
+	do {								\
+		__mod_page_state(state_zone_offset(zone, member), (delta)); \
 	} while (0)
 
 /*
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -400,8 +400,6 @@ void __free_pages_ok(struct page *page, 
 
 	arch_free_page(page, order);
 
-	mod_page_state(pgfree, 1 << order);
-
 #ifndef CONFIG_MMU
 	if (order > 0)
 		for (i = 1 ; i < (1 << order) ; ++i)
@@ -413,6 +411,7 @@ void __free_pages_ok(struct page *page, 
 	list_add(&page->lru, &list);
 	kernel_map_pages(page, 1<<order, 0);
 	local_irq_save(flags);
+	page_state(pgfree) += 1 << order;
 	free_pages_bulk(page_zone(page), 1, &list, order);
 	local_irq_restore(flags);
 }
@@ -662,12 +661,12 @@ static void fastcall free_hot_cold_page(
 	arch_free_page(page, 0);
 
 	kernel_map_pages(page, 1, 0);
-	inc_page_state(pgfree);
 	if (PageAnon(page))
 		page->mapping = NULL;
 	free_pages_check(page);
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
+	page_state(pgfree)++;
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
 	if (pcp->count >= pcp->high)
@@ -704,42 +703,50 @@ static struct page *
 buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
-	struct page *page = NULL;
+	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
+	int cpu = get_cpu();
 
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
 
-		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp[cold];
 		local_irq_save(flags);
-		if (!pcp->count)
+		if (!pcp->count) {
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (likely(pcp->count)) {
-			page = list_entry(pcp->list.next, struct page, lru);
-			list_del(&page->lru);
-			pcp->count--;
+			if (unlikely(!pcp->count))
+				goto failed;
 		}
-		local_irq_restore(flags);
-		put_cpu();
+		page = list_entry(pcp->list.next, struct page, lru);
+		list_del(&page->lru);
+		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		spin_unlock(&zone->lock);
+		if (!page)
+			goto failed;
 	}
 
-	if (page != NULL) {
-		BUG_ON(bad_range(zone, page));
-		mod_page_state_zone(zone, pgalloc, 1 << order);
-		prep_new_page(page, order);
+	page_state_zone(zone, pgalloc) += 1 << order;
+	local_irq_restore(flags);
+	put_cpu();
 
-		if (gfp_flags & __GFP_ZERO)
-			prep_zero_page(page, order, gfp_flags);
+	BUG_ON(bad_range(zone, page));
+	prep_new_page(page, order);
 
-		if (order && (gfp_flags & __GFP_COMP))
-			prep_compound_page(page, order);
-	}
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
 	return page;
+
+failed:
+	local_irq_restore(flags);
+	put_cpu();
+	return NULL;
 }
 
 /*
@@ -1215,6 +1222,15 @@ unsigned long __read_page_state(unsigned
 	return ret;
 }
 
+unsigned long *__page_state(unsigned long offset)
+{
+	void* ptr;
+	ptr = &__get_cpu_var(page_states);
+	return (unsigned long*)(ptr + offset);
+}
+
+EXPORT_SYMBOL(__page_state);
+
 void __mod_page_state(unsigned long offset, unsigned long delta)
 {
 	unsigned long flags;
Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c
+++ linux-2.6/mm/vmscan.c
@@ -641,17 +641,18 @@ static void shrink_cache(struct zone *zo
 			goto done;
 
 		max_scan -= nr_scan;
-		if (current_is_kswapd())
-			mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-		else
-			mod_page_state_zone(zone, pgscan_direct, nr_scan);
 		nr_freed = shrink_list(&page_list, sc);
-		if (current_is_kswapd())
-			mod_page_state(kswapd_steal, nr_freed);
-		mod_page_state_zone(zone, pgsteal, nr_freed);
 		sc->nr_to_reclaim -= nr_freed;
 
-		spin_lock_irq(&zone->lru_lock);
+		local_irq_disable();
+		if (current_is_kswapd()) {
+			page_state_zone(zone, pgscan_kswapd) += nr_scan;
+			page_state(kswapd_steal) += nr_freed;
+		} else
+			page_state_zone(zone, pgscan_direct) += nr_scan;
+		page_state_zone(zone, pgsteal) += nr_freed;
+
+		spin_lock(&zone->lru_lock);
 		/*
 		 * Put back any unfreeable pages.
 		 */
@@ -813,11 +814,13 @@ refill_inactive_zone(struct zone *zone, 
 		}
 	}
 	zone->nr_active += pgmoved;
-	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	spin_unlock(&zone->lru_lock);
+
+	page_state_zone(zone, pgrefill) += pgscanned;
+	page_state(pgdeactivate) += pgdeactivate;
+	local_irq_enable();
 
-	mod_page_state_zone(zone, pgrefill, pgscanned);
-	mod_page_state(pgdeactivate, pgdeactivate);
+	pagevec_release(&pvec);
 }
 
 /*
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -456,7 +456,11 @@ static void __page_set_anon_rmap(struct 
 
 	page->index = linear_page_index(vma, address);
 
-	inc_page_state(nr_mapped);
+	/*
+	 * nr_mapped state can be updated without turning off
+	 * interrupts because it is not modified via interrupt.
+	 */
+	page_state(nr_mapped)++;
 }
 
 /**
@@ -503,7 +507,7 @@ void page_add_file_rmap(struct page *pag
 	BUG_ON(!pfn_valid(page_to_pfn(page)));
 
 	if (atomic_inc_and_test(&page->_mapcount))
-		inc_page_state(nr_mapped);
+		page_state(nr_mapped)++;
 }
 
 /**
@@ -535,7 +539,7 @@ void page_remove_rmap(struct page *page)
 	 * Leaving it set also helps swapoff to reinstate ptes
 	 * faster for those pages still in swapcache.
 	 */
-	dec_page_state(nr_mapped);
+	page_state(nr_mapped)--;
 }
 
 /*

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 10/14] mm: single pcp list
  2005-11-06  8:25                 ` [patch 9/14] mm: page_state opt Nick Piggin
@ 2005-11-06  8:26                   ` Nick Piggin
  2005-11-06  8:26                     ` [patch 11/14] mm: increase pcp size Nick Piggin
  2005-11-13  2:38                   ` [patch 9/14] mm: page_state opt Andi Kleen
  1 sibling, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:26 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 35 bytes --]

10/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-single-pcp-list.patch --]
[-- Type: text/plain, Size: 8572 bytes --]

Use a single pcp list.

Having a hot and a cold pcp list means that cold pages are overlooked
when when a hot page is needed but none available. So a workload that is
doing heavy page reclaim will not take much advantage of the pcps for
minimising zone lock contention for the pages it is freeing up.

The same wastage applies the other way (eg. when the hot list fills up
and cold list is empty). The patch also takes care of that.

Disallow cold page allocation from taking hot pages though.

Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -44,15 +44,13 @@ struct zone_padding {
 #define ZONE_PADDING(name)
 #endif
 
-struct per_cpu_pages {
+struct per_cpu_pageset {
+	struct list_head list;	/* the list of pages */
 	int count;		/* number of pages in the list */
+	int cold_count;		/* number of cold pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
-};
 
-struct per_cpu_pageset {
-	struct per_cpu_pages pcp[2];	/* 0: hot.  1: cold */
 #ifdef CONFIG_NUMA
 	unsigned long numa_hit;		/* allocated in intended node */
 	unsigned long numa_miss;	/* allocated in non intended node */
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -533,10 +533,8 @@ static int rmqueue_bulk(struct zone *zon
 void drain_remote_pages(void)
 {
 	struct zone *zone;
-	int i;
 	unsigned long flags;
 
-	local_irq_save(flags);
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
 
@@ -544,17 +542,16 @@ void drain_remote_pages(void)
 		if (zone->zone_pgdat->node_id == numa_node_id())
 			continue;
 
-		pset = zone->pageset[smp_processor_id()];
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &pset->pcp[i];
-			if (pcp->count)
-				pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
+		local_irq_save(flags);
+		if (zone->zone_pgdat->node_id != numa_node_id()) {
+			pset = zone->pageset[smp_processor_id()];
+			if (pset->count)
+				pset->count -= free_pages_bulk(zone,
+						pset->count, &pset->list, 0);
+			pset->cold_count = min(pset->cold_count, pset->count);
 		}
+		local_irq_restore(flags);
 	}
-	local_irq_restore(flags);
 }
 #endif
 
@@ -563,21 +560,16 @@ static void __drain_pages(unsigned int c
 {
 	unsigned long flags;
 	struct zone *zone;
-	int i;
 
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
 
 		pset = zone_pcp(zone, cpu);
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &pset->pcp[i];
-			local_irq_save(flags);
-			pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
-			local_irq_restore(flags);
-		}
+		local_irq_save(flags);
+		pset->count -= free_pages_bulk(zone, pset->count,
+							&pset->list, 0);
+		pset->cold_count = min(pset->cold_count, pset->count);
+		local_irq_restore(flags);
 	}
 }
 #endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
@@ -655,7 +647,8 @@ static void FASTCALL(free_hot_cold_page(
 static void fastcall free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
-	struct per_cpu_pages *pcp;
+	struct per_cpu_pageset *pset;
+	struct list_head *entry;
 	unsigned long flags;
 
 	arch_free_page(page, 0);
@@ -664,13 +657,21 @@ static void fastcall free_hot_cold_page(
 	if (PageAnon(page))
 		page->mapping = NULL;
 	free_pages_check(page);
-	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	pset = zone_pcp(zone, get_cpu());
 	local_irq_save(flags);
 	page_state(pgfree)++;
-	list_add(&page->lru, &pcp->list);
-	pcp->count++;
-	if (pcp->count >= pcp->high)
-		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+	pset->count++;
+	entry = &pset->list;
+	if (cold) {
+		pset->cold_count++;
+		entry = entry->prev; /* tail */
+	}
+	list_add(&page->lru, entry);
+	if (pset->count > pset->high) {
+		pset->count -= free_pages_bulk(zone, pset->batch,
+							&pset->list, 0);
+		pset->cold_count = min(pset->cold_count, pset->count);
+	}
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -708,19 +709,31 @@ buffered_rmqueue(struct zone *zone, int 
 	int cpu = get_cpu();
 
 	if (order == 0) {
-		struct per_cpu_pages *pcp;
+		struct per_cpu_pageset *pset;
+		struct list_head *entry;
 
-		pcp = &zone_pcp(zone, cpu)->pcp[cold];
+		pset = zone_pcp(zone, cpu);
 		local_irq_save(flags);
-		if (!pcp->count) {
-			pcp->count += rmqueue_bulk(zone, 0,
-						pcp->batch, &pcp->list);
-			if (unlikely(!pcp->count))
+		if (!pset->count || (cold && !pset->cold_count &&
+				pset->count <= pset->high - (pset->high>>2))) {
+			int count;
+			count = rmqueue_bulk(zone, 0,pset->batch, &pset->list);
+			if (unlikely(!count))
 				goto failed;
+			pset->count += count;
+			pset->cold_count += count;
 		}
-		page = list_entry(pcp->list.next, struct page, lru);
+
+		pset->count--;
+		entry = pset->list.next;
+		if (cold) {
+			if (pset->cold_count)
+				pset->cold_count--;
+			entry = pset->list.prev;
+		}
+		pset->cold_count = min(pset->cold_count, pset->count);
+		page = list_entry(entry, struct page, lru);
 		list_del(&page->lru);
-		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
@@ -1318,7 +1331,7 @@ void si_meminfo_node(struct sysinfo *val
 void show_free_areas(void)
 {
 	struct page_state ps;
-	int cpu, temperature;
+	int cpu;
 	unsigned long active;
 	unsigned long inactive;
 	unsigned long free;
@@ -1335,17 +1348,11 @@ void show_free_areas(void)
 			printk("\n");
 
 		for_each_cpu(cpu) {
-			struct per_cpu_pageset *pageset;
+			struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
 
-			pageset = zone_pcp(zone, cpu);
-
-			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: high %d, batch %d used:%d\n",
-					cpu,
-					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].high,
-					pageset->pcp[temperature].batch,
-					pageset->pcp[temperature].count);
+			printk("cpu %d: high %d, batch %d, pages %d, cold %d\n",
+				cpu, pset->high, pset->batch,
+				pset->count, pset->cold_count);
 		}
 	}
 
@@ -1774,21 +1781,12 @@ static int __devinit zone_batchsize(stru
 
 inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
-	struct per_cpu_pages *pcp;
-
 	memset(p, 0, sizeof(*p));
-
-	pcp = &p->pcp[0];		/* hot */
-	pcp->count = 0;
-	pcp->high = 4 * batch;
-	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
-
-	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
-	pcp->high = 2 * batch;
-	pcp->batch = max(1UL, batch/2);
-	INIT_LIST_HEAD(&pcp->list);
+	p->count = 0;
+	p->cold_count = 0;
+	p->high = 6 * batch;
+	p->batch = max(1UL, 1 * batch);
+	INIT_LIST_HEAD(&p->list);
 }
 
 #ifdef CONFIG_NUMA
@@ -2168,27 +2166,15 @@ static int zoneinfo_show(struct seq_file
 			   ")"
 			   "\n  pagesets");
 		for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) {
-			struct per_cpu_pageset *pageset;
-			int j;
+			struct per_cpu_pageset *pset;
 
-			pageset = zone_pcp(zone, i);
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				if (pageset->pcp[j].count)
-					break;
-			}
-			if (j == ARRAY_SIZE(pageset->pcp))
-				continue;
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				seq_printf(m,
-					   "\n    cpu: %i pcp: %i"
-					   "\n              count: %i"
-					   "\n              high:  %i"
-					   "\n              batch: %i",
-					   i, j,
-					   pageset->pcp[j].count,
-					   pageset->pcp[j].high,
-					   pageset->pcp[j].batch);
-			}
+			pset = zone_pcp(zone, i);
+			seq_printf(m,
+				   "\n    cpu: %i"
+				   "\n              count: %i"
+				   "\n              high:  %i"
+				   "\n              batch: %i",
+				   i, pset->count, pset->high, pset->batch);
 #ifdef CONFIG_NUMA
 			seq_printf(m,
 				   "\n            numa_hit:       %lu"
@@ -2197,12 +2183,12 @@ static int zoneinfo_show(struct seq_file
 				   "\n            interleave_hit: %lu"
 				   "\n            local_node:     %lu"
 				   "\n            other_node:     %lu",
-				   pageset->numa_hit,
-				   pageset->numa_miss,
-				   pageset->numa_foreign,
-				   pageset->interleave_hit,
-				   pageset->local_node,
-				   pageset->other_node);
+				   pset->numa_hit,
+				   pset->numa_miss,
+				   pset->numa_foreign,
+				   pset->interleave_hit,
+				   pset->local_node,
+				   pset->other_node);
 #endif
 		}
 		seq_printf(m,

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 11/14] mm: increase pcp size
  2005-11-06  8:26                   ` [patch 10/14] mm: single pcp list Nick Piggin
@ 2005-11-06  8:26                     ` Nick Piggin
  2005-11-06  8:27                       ` [patch 12/14] mm: variable " Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:26 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 35 bytes --]

11/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-increase-pcp-size.patch --]
[-- Type: text/plain, Size: 472 bytes --]

Increasing pageset size gives improvements on kbuild on my Xeon.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -1784,7 +1784,7 @@ inline void setup_pageset(struct per_cpu
 	memset(p, 0, sizeof(*p));
 	p->count = 0;
 	p->cold_count = 0;
-	p->high = 6 * batch;
+	p->high = 16 * batch;
 	p->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&p->list);
 }

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 12/14] mm: variable pcp size
  2005-11-06  8:26                     ` [patch 11/14] mm: increase pcp size Nick Piggin
@ 2005-11-06  8:27                       ` Nick Piggin
  2005-11-06  8:27                         ` [patch 13/14] mm: cleanup zone_pcp Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:27 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 35 bytes --]

12/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-variable-pcp-size.patch --]
[-- Type: text/plain, Size: 1973 bytes --]

The previous increase in pcp list size will probably be too much for
huge NUMA machines, despite advances in keeping remote pagesets in check.
Make pcp sizes for remote zones much smaller (slightly smaller than before
the increase), and take advantage of this to increase local pcp list size
again.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -1779,13 +1779,14 @@ static int __devinit zone_batchsize(stru
 	return batch;
 }
 
-inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static inline void setup_pageset(struct per_cpu_pageset *p,
+				unsigned long size, unsigned long batch)
 {
 	memset(p, 0, sizeof(*p));
 	p->count = 0;
 	p->cold_count = 0;
-	p->high = 16 * batch;
-	p->batch = max(1UL, 1 * batch);
+	p->high = max(1UL, size);
+	p->batch = max(1UL, batch);
 	INIT_LIST_HEAD(&p->list);
 }
 
@@ -1819,13 +1820,19 @@ static int __devinit process_zones(int c
 	struct zone *zone, *dzone;
 
 	for_each_zone(zone) {
+		unsigned long size, batch;
 
 		zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
 					 GFP_KERNEL, cpu_to_node(cpu));
 		if (!zone->pageset[cpu])
 			goto bad;
 
-		setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+		batch = zone_batchsize(zone);
+		if (cpu_to_node(cpu) == zone->zone_pgdat->node_id)
+			size = batch * 32;
+		else
+			size = batch * 4;
+		setup_pageset(zone->pageset[cpu], size, batch);
 	}
 
 	return 0;
@@ -1923,9 +1930,9 @@ static __devinit void zone_pcp_init(stru
 #ifdef CONFIG_NUMA
 		/* Early boot. Slab allocator not functional yet */
 		zone->pageset[cpu] = &boot_pageset[cpu];
-		setup_pageset(&boot_pageset[cpu],0);
+		setup_pageset(&boot_pageset[cpu], 0, 0);
 #else
-		setup_pageset(zone_pcp(zone,cpu), batch);
+		setup_pageset(zone_pcp(zone, cpu), batch * 32, batch);
 #endif
 	}
 	printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 13/14] mm: cleanup zone_pcp
  2005-11-06  8:27                       ` [patch 12/14] mm: variable " Nick Piggin
@ 2005-11-06  8:27                         ` Nick Piggin
  2005-11-06  8:28                           ` [patch 14/14] mm: page_alloc cleanups Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:27 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 35 bytes --]

13/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-cleanup-zone_pcp.patch --]
[-- Type: text/plain, Size: 2214 bytes --]

Use zone_pcp everywhere even though NUMA code "knows" the internal
details of the zone. Stop other people trying to copy, and it looks
nicer.

Also, only print the pagesets of online cpus in zoneinfo.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -544,7 +544,7 @@ void drain_remote_pages(void)
 
 		local_irq_save(flags);
 		if (zone->zone_pgdat->node_id != numa_node_id()) {
-			pset = zone->pageset[smp_processor_id()];
+			pset = zone_pcp(zone, smp_processor_id());
 			if (pset->count)
 				pset->count -= free_pages_bulk(zone,
 						pset->count, &pset->list, 0);
@@ -1822,9 +1822,9 @@ static int __devinit process_zones(int c
 	for_each_zone(zone) {
 		unsigned long size, batch;
 
-		zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
+		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
 					 GFP_KERNEL, cpu_to_node(cpu));
-		if (!zone->pageset[cpu])
+		if (!zone_pcp(zone, cpu))
 			goto bad;
 
 		batch = zone_batchsize(zone);
@@ -1832,7 +1832,7 @@ static int __devinit process_zones(int c
 			size = batch * 32;
 		else
 			size = batch * 4;
-		setup_pageset(zone->pageset[cpu], size, batch);
+		setup_pageset(zone_pcp(zone, cpu), size, batch);
 	}
 
 	return 0;
@@ -1840,8 +1840,8 @@ bad:
 	for_each_zone(dzone) {
 		if (dzone == zone)
 			break;
-		kfree(dzone->pageset[cpu]);
-		dzone->pageset[cpu] = NULL;
+		kfree(zone_pcp(dzone, cpu));
+		zone_pcp(dzone, cpu) = NULL;
 	}
 	return -ENOMEM;
 }
@@ -1929,8 +1929,8 @@ static __devinit void zone_pcp_init(stru
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 #ifdef CONFIG_NUMA
 		/* Early boot. Slab allocator not functional yet */
-		zone->pageset[cpu] = &boot_pageset[cpu];
 		setup_pageset(&boot_pageset[cpu], 0, 0);
+		zone_pcp(zone, cpu) = &boot_pageset[cpu];
 #else
 		setup_pageset(zone_pcp(zone, cpu), batch * 32, batch);
 #endif
@@ -2172,7 +2172,7 @@ static int zoneinfo_show(struct seq_file
 		seq_printf(m,
 			   ")"
 			   "\n  pagesets");
-		for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) {
+		for_each_online_cpu(i) {
 			struct per_cpu_pageset *pset;
 
 			pset = zone_pcp(zone, i);

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 14/14] mm: page_alloc cleanups
  2005-11-06  8:27                         ` [patch 13/14] mm: cleanup zone_pcp Nick Piggin
@ 2005-11-06  8:28                           ` Nick Piggin
  0 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:28 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 35 bytes --]

14/14

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-page_alloc-cleanups.patch --]
[-- Type: text/plain, Size: 1418 bytes --]

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -431,8 +431,7 @@ void __free_pages_ok(struct page *page, 
  *
  * -- wli
  */
-static inline struct page *
-expand(struct zone *zone, struct page *page,
+static inline void expand(struct zone *zone, struct page *page,
  	int low, int high, struct free_area *area)
 {
 	unsigned long size = 1 << high;
@@ -446,7 +445,6 @@ expand(struct zone *zone, struct page *p
 		area->nr_free++;
 		set_page_order(&page[size], high);
 	}
-	return page;
 }
 
 /*
@@ -498,7 +496,8 @@ static struct page *__rmqueue(struct zon
 		rmv_page_order(page);
 		area->nr_free--;
 		zone->free_pages -= 1UL << order;
-		return expand(zone, page, order, current_order, area);
+		expand(zone, page, order, current_order, area);
+		return page;
 	}
 
 	return NULL;
@@ -513,19 +512,16 @@ static int rmqueue_bulk(struct zone *zon
 			unsigned long count, struct list_head *list)
 {
 	int i;
-	int allocated = 0;
-	struct page *page;
 	
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
-		page = __rmqueue(zone, order);
-		if (page == NULL)
+		struct page *page = __rmqueue(zone, order);
+		if (unlikely(page == NULL))
 			break;
-		allocated++;
 		list_add_tail(&page->lru, list);
 	}
 	spin_unlock(&zone->lock);
-	return allocated;
+	return i;
 }
 
 #ifdef CONFIG_NUMA

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 2/14] mm: pte prefetch
  2005-11-06  8:20   ` [patch 2/14] mm: pte prefetch Nick Piggin
  2005-11-06  8:21     ` [patch 3/14] mm: release opt Nick Piggin
@ 2005-11-06  8:35     ` Arjan van de Ven
  2005-11-06  8:51       ` Nick Piggin
  1 sibling, 1 reply; 33+ messages in thread
From: Arjan van de Ven @ 2005-11-06  8:35 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List

On Sun, 2005-11-06 at 19:20 +1100, Nick Piggin wrote:
> 2/14
> 
> plain text document attachment (mm-pte-prefetch.patch)
> Prefetch ptes a line ahead. Worth 25% on ia64 when doing big forks.
> 
> Index: linux-2.6/include/asm-generic/pgtable.h
> ===================================================================
> --- linux-2.6.orig/include/asm-generic/pgtable.h
> +++ linux-2.6/include/asm-generic/pgtable.h
> @@ -196,6 +196,33 @@ static inline void ptep_set_wrprotect(st
>  })
>  #endif
>  
> +#ifndef __HAVE_ARCH_PTE_PREFETCH
> +#define PTES_PER_LINE (L1_CACHE_BYTES / sizeof(pte_t))
> +#define PTE_LINE_MASK (~(PTES_PER_LINE - 1))
> +#define ADDR_PER_LINE (PTES_PER_LINE << PAGE_SHIFT)
> +#define ADDR_LINE_MASK (~(ADDR_PER_LINE - 1))
> +
> +#define pte_prefetch(pte, addr, end)					\
> +({									\
> +	unsigned long __nextline = ((addr) + ADDR_PER_LINE) & ADDR_LINE_MASK; \
> +	if (__nextline < (end))						\
> +		prefetch(pte + PTES_PER_LINE);				\
> +})
> +

are you sure this is right? at least on pc's having a branch predictor
miss is very expensive and might well be more expensive than the gain
you get from a prefetch



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 2/14] mm: pte prefetch
  2005-11-06  8:35     ` [patch 2/14] mm: pte prefetch Arjan van de Ven
@ 2005-11-06  8:51       ` Nick Piggin
  0 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-06  8:51 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Linux Kernel Mailing List

Arjan van de Ven wrote:
> On Sun, 2005-11-06 at 19:20 +1100, Nick Piggin wrote:
> 
>>2/14
>>
>>plain text document attachment (mm-pte-prefetch.patch)
>>Prefetch ptes a line ahead. Worth 25% on ia64 when doing big forks.
>>
>>Index: linux-2.6/include/asm-generic/pgtable.h
>>===================================================================
>>--- linux-2.6.orig/include/asm-generic/pgtable.h
>>+++ linux-2.6/include/asm-generic/pgtable.h
>>@@ -196,6 +196,33 @@ static inline void ptep_set_wrprotect(st
>> })
>> #endif
>> 
>>+#ifndef __HAVE_ARCH_PTE_PREFETCH
>>+#define PTES_PER_LINE (L1_CACHE_BYTES / sizeof(pte_t))
>>+#define PTE_LINE_MASK (~(PTES_PER_LINE - 1))
>>+#define ADDR_PER_LINE (PTES_PER_LINE << PAGE_SHIFT)
>>+#define ADDR_LINE_MASK (~(ADDR_PER_LINE - 1))
>>+
>>+#define pte_prefetch(pte, addr, end)					\
>>+({									\
>>+	unsigned long __nextline = ((addr) + ADDR_PER_LINE) & ADDR_LINE_MASK; \
>>+	if (__nextline < (end))						\
>>+		prefetch(pte + PTES_PER_LINE);				\
>>+})
>>+
> 
> 
> are you sure this is right? at least on pc's having a branch predictor
> miss is very expensive and might well be more expensive than the gain
> you get from a prefetch
> 

Yeah, not 100% sure about this one, which is why it has been sitting
around for so long.

It gives about 25% on contrived fork workload on an ia64 system, which
is probably about its best case workload+architecture. I haven't found
any notable regressions but it definitely isn't going to be any faster
when the page tables are in cache.

So long as I haven't found a real-world workload that is improved with
the patch, I won't be trynig to get it merged.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 7/14] mm: remove bad_range
  2005-11-06  8:24             ` [patch 7/14] mm: remove bad_range Nick Piggin
  2005-11-06  8:25               ` [patch 8/14] mm: remove pcp_low Nick Piggin
@ 2005-11-06 17:37               ` Bob Picco
  2005-11-07  0:58                 ` Nick Piggin
  1 sibling, 1 reply; 33+ messages in thread
From: Bob Picco @ 2005-11-06 17:37 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List

Nick Piggin wrote:	[Sun Nov 06 2005, 03:24:40AM EST]
> 7/14
> 
> -- 
> SUSE Labs, Novell Inc.
> 

> bad_range is supposed to be a temporary check. It would be a pity to throw
> it out. Make it depend on CONFIG_DEBUG_VM instead.
> 
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -78,6 +78,7 @@ int min_free_kbytes = 1024;
>  unsigned long __initdata nr_kernel_pages;
>  unsigned long __initdata nr_all_pages;
>  
> +#ifdef CONFIG_DEBUG_VM
>  static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
>  {
>  	int ret = 0;
> @@ -119,6 +120,13 @@ static int bad_range(struct zone *zone, 
>  	return 0;
>  }
>  
> +#else
> +static inline int bad_range(struct zone *zone, struct page *page)
> +{
> +	return 0;
> +}
> +#endif
> +
>  static void bad_page(const char *function, struct page *page)
>  {
>  	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
> Index: linux-2.6/lib/Kconfig.debug
> ===================================================================
> --- linux-2.6.orig/lib/Kconfig.debug
> +++ linux-2.6/lib/Kconfig.debug
> @@ -172,7 +172,8 @@ config DEBUG_VM
>  	bool "Debug VM"
>  	depends on DEBUG_KERNEL
>  	help
> -	  Enable this to debug the virtual-memory system.
> +	  Enable this to turn on extended checks in the virtual-memory system
> +          that may impact performance.
>  
>  	  If unsure, say N.
>  
Nick,

I don't think you can do it this way. On ia64 VIRTUAL_MEM_MAP depends on 
CONFIG_HOLES_IN_ZONE and the check within bad_range for pfn_valid. Holes in
memory (MMIO and etc.) won't have a page structure.

bob


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/14] mm: opt rmqueue
  2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
  2005-11-06  8:20   ` [patch 2/14] mm: Nick Piggin
  2005-11-06  8:20   ` [patch 2/14] mm: pte prefetch Nick Piggin
@ 2005-11-06 17:37   ` Andi Kleen
  2005-11-07  1:06     ` Nick Piggin
  2 siblings, 1 reply; 33+ messages in thread
From: Andi Kleen @ 2005-11-06 17:37 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel

Nick Piggin <nickpiggin@yahoo.com.au> writes:

> 1/14
> 
> -- 
> SUSE Labs, Novell Inc.
> 
> Slightly optimise some page allocation and freeing functions by
> taking advantage of knowing whether or not interrupts are disabled.

Another thing that could optimize that would be to use local_t
for the per zone statistics and the VM statistics (i have an
old patch for the later, needs polishing up for the current
kernel) 
With an architecture optimized for it (like i386/x86-64) they
generate much better code.

-Andi

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 7/14] mm: remove bad_range
  2005-11-06 17:37               ` [patch 7/14] mm: remove bad_range Bob Picco
@ 2005-11-07  0:58                 ` Nick Piggin
  2005-11-07  3:00                   ` Bob Picco
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  0:58 UTC (permalink / raw)
  To: Bob Picco; +Cc: Linux Kernel Mailing List

Bob Picco wrote:
> Nick Piggin wrote:	[Sun Nov 06 2005, 03:24:40AM EST]
> 
>>7/14
>>
>>-- 
>>SUSE Labs, Novell Inc.
>>
> 
> 
>>bad_range is supposed to be a temporary check. It would be a pity to throw
>>it out. Make it depend on CONFIG_DEBUG_VM instead.
>>
>>Index: linux-2.6/mm/page_alloc.c
>>===================================================================
>>--- linux-2.6.orig/mm/page_alloc.c
>>+++ linux-2.6/mm/page_alloc.c
>>@@ -78,6 +78,7 @@ int min_free_kbytes = 1024;
>> unsigned long __initdata nr_kernel_pages;
>> unsigned long __initdata nr_all_pages;
>> 
>>+#ifdef CONFIG_DEBUG_VM
>> static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
>> {
>> 	int ret = 0;
>>@@ -119,6 +120,13 @@ static int bad_range(struct zone *zone, 
>> 	return 0;
>> }
>> 
>>+#else
>>+static inline int bad_range(struct zone *zone, struct page *page)
>>+{
>>+	return 0;
>>+}
>>+#endif
>>+
>> static void bad_page(const char *function, struct page *page)
>> {
>> 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
>>Index: linux-2.6/lib/Kconfig.debug
>>===================================================================
>>--- linux-2.6.orig/lib/Kconfig.debug
>>+++ linux-2.6/lib/Kconfig.debug
>>@@ -172,7 +172,8 @@ config DEBUG_VM
>> 	bool "Debug VM"
>> 	depends on DEBUG_KERNEL
>> 	help
>>-	  Enable this to debug the virtual-memory system.
>>+	  Enable this to turn on extended checks in the virtual-memory system
>>+          that may impact performance.
>> 
>> 	  If unsure, say N.
>> 
> 
> Nick,
> 
> I don't think you can do it this way. On ia64 VIRTUAL_MEM_MAP depends on 
> CONFIG_HOLES_IN_ZONE and the check within bad_range for pfn_valid. Holes in
> memory (MMIO and etc.) won't have a page structure.
> 

Hmm, right - in __free_pages_bulk.

Could we make a different call here, or is the full array of bad_range
checks required?

Thanks,
Nick

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/14] mm: opt rmqueue
  2005-11-06 17:37   ` [patch 1/14] mm: opt rmqueue Andi Kleen
@ 2005-11-07  1:06     ` Nick Piggin
  2005-11-07  3:23       ` Andi Kleen
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  1:06 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen wrote:
> Nick Piggin <nickpiggin@yahoo.com.au> writes:
> 
> 
>>1/14
>>
>>-- 
>>SUSE Labs, Novell Inc.
>>
>>Slightly optimise some page allocation and freeing functions by
>>taking advantage of knowing whether or not interrupts are disabled.
> 
> 
> Another thing that could optimize that would be to use local_t
> for the per zone statistics and the VM statistics (i have an
> old patch for the later, needs polishing up for the current
> kernel) 
> With an architecture optimized for it (like i386/x86-64) they
> generate much better code.
> 

Yes, all this turning on and off of interrupts does have a
significant cost here.

With the full patchset applied, most of the hot path statistics
get put under areas that already require interrupts to be off,
however there are still a few I didn't get around to doing.
zone_statistics on CONFIG_NUMA, for example.

I wonder if local_t is still good on architectures like ppc64
where it still requires an ll/sc sequence?

Nick

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [rfc][patch 0/14] mm: performance improvements
  2005-11-06  8:11 [rfc][patch 0/14] mm: performance improvements Nick Piggin
  2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
@ 2005-11-07  1:39 ` Christoph Hellwig
  2005-11-07  1:51   ` Nick Piggin
  1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2005-11-07  1:39 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List

could you _please_ send you patches inline?  skipping to an attachment
everytime to read the description and patch is very awkward.  We can
allow that as an exception for people who send a single patch occasionally,
but for huge patch series it's highly annoying.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 5/14] mm: set_page_refs opt
  2005-11-06  8:23         ` [patch 5/14] mm: set_page_refs opt Nick Piggin
  2005-11-06  8:24           ` [patch 6/14] mm: microopt conditions Nick Piggin
@ 2005-11-07  1:40           ` Christoph Hellwig
  2005-11-07  1:45             ` Nick Piggin
  1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2005-11-07  1:40 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List

On Sun, Nov 06, 2005 at 07:23:30PM +1100, Nick Piggin wrote:
> 5/14
> 
> -- 
> SUSE Labs, Novell Inc.
> 

> Inline set_page_refs. Remove mm/internal.h

So why don't you keep the inline function in mm/internal.h?  this isn't
really stuff we want driver writers to use every.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 5/14] mm: set_page_refs opt
  2005-11-07  1:40           ` [patch 5/14] mm: set_page_refs opt Christoph Hellwig
@ 2005-11-07  1:45             ` Nick Piggin
  0 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  1:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Linux Kernel Mailing List

Christoph Hellwig wrote:
> On Sun, Nov 06, 2005 at 07:23:30PM +1100, Nick Piggin wrote:
> 
>>5/14
>>
>>-- 
>>SUSE Labs, Novell Inc.
>>
> 
> 
>>Inline set_page_refs. Remove mm/internal.h
> 
> 
> So why don't you keep the inline function in mm/internal.h?  this isn't
> really stuff we want driver writers to use every.
> 
> 

There are plenty of things in the linux/ headers which driver
writers shouldn't use.

Although I think your idea is a good one, and one has to start
somewhere. I'll make that change, thanks.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [rfc][patch 0/14] mm: performance improvements
  2005-11-07  1:39 ` [rfc][patch 0/14] mm: performance improvements Christoph Hellwig
@ 2005-11-07  1:51   ` Nick Piggin
  2005-11-07  3:57     ` Paul Jackson
  0 siblings, 1 reply; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  1:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Linux Kernel Mailing List

Christoph Hellwig wrote:
> could you _please_ send you patches inline?  skipping to an attachment
> everytime to read the description and patch is very awkward.  We can
> allow that as an exception for people who send a single patch occasionally,
> but for huge patch series it's highly annoying.
> 

Yeah, my mailer traditionally breaks them and not many people have
complained. I was hoping people were just allowing an exception for
me because I'm cool, but I guess not :(

Maybe time to switch mailers... I'll see what I can do.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 7/14] mm: remove bad_range
  2005-11-07  0:58                 ` Nick Piggin
@ 2005-11-07  3:00                   ` Bob Picco
  2005-11-07  3:05                     ` Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Bob Picco @ 2005-11-07  3:00 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Bob Picco, Linux Kernel Mailing List

Nick Piggin wrote:	[Sun Nov 06 2005, 07:58:26PM EST]
> Bob Picco wrote:
> >Nick Piggin wrote:	[Sun Nov 06 2005, 03:24:40AM EST]
> >
> >>7/14
> >>
> >>-- 
> >>SUSE Labs, Novell Inc.
> >>
> >
> >
> >>bad_range is supposed to be a temporary check. It would be a pity to throw
> >>it out. Make it depend on CONFIG_DEBUG_VM instead.
> >>
> >>Index: linux-2.6/mm/page_alloc.c
> >>===================================================================
> >>--- linux-2.6.orig/mm/page_alloc.c
> >>+++ linux-2.6/mm/page_alloc.c
> >>@@ -78,6 +78,7 @@ int min_free_kbytes = 1024;
> >>unsigned long __initdata nr_kernel_pages;
> >>unsigned long __initdata nr_all_pages;
> >>
> >>+#ifdef CONFIG_DEBUG_VM
> >>static int page_outside_zone_boundaries(struct zone *zone, struct page 
> >>*page)
> >>{
> >>	int ret = 0;
> >>@@ -119,6 +120,13 @@ static int bad_range(struct zone *zone, 
> >>	return 0;
> >>}
> >>
> >>+#else
> >>+static inline int bad_range(struct zone *zone, struct page *page)
> >>+{
> >>+	return 0;
> >>+}
> >>+#endif
> >>+
> >>static void bad_page(const char *function, struct page *page)
> >>{
> >>	printk(KERN_EMERG "Bad page state at %s (in process '%s', page 
> >>	%p)\n",
> >>Index: linux-2.6/lib/Kconfig.debug
> >>===================================================================
> >>--- linux-2.6.orig/lib/Kconfig.debug
> >>+++ linux-2.6/lib/Kconfig.debug
> >>@@ -172,7 +172,8 @@ config DEBUG_VM
> >>	bool "Debug VM"
> >>	depends on DEBUG_KERNEL
> >>	help
> >>-	  Enable this to debug the virtual-memory system.
> >>+	  Enable this to turn on extended checks in the virtual-memory system
> >>+          that may impact performance.
> >>
> >>	  If unsure, say N.
> >>
> >
> >Nick,
> >
> >I don't think you can do it this way. On ia64 VIRTUAL_MEM_MAP depends on 
> >CONFIG_HOLES_IN_ZONE and the check within bad_range for pfn_valid. Holes in
> >memory (MMIO and etc.) won't have a page structure.
> >
> 
> Hmm, right - in __free_pages_bulk.
> 
> Could we make a different call here, or is the full array of bad_range
> checks required?
Not the full array. Just the pfn_valid call. Seems CONFIG_HOLES_IN_ZONE is
already in page_alloc.c, perhaps just in __free_pages_bulk as a replacement
for the bad_range call which isn't within a  BUG_ON check. It's somewhat of a 
wart but already there. Otherwise we might want arch_holes_in_zone inline 
which is only required by ia64 and noop for other arches.

The only place I didn't look closely is the BUG_ON in expand. I'll do that
tomorrow.
> 
> Thanks,
> Nick
> 
> -- 
your welcome,

bob

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 7/14] mm: remove bad_range
  2005-11-07  3:00                   ` Bob Picco
@ 2005-11-07  3:05                     ` Nick Piggin
  0 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  3:05 UTC (permalink / raw)
  To: Bob Picco; +Cc: Linux Kernel Mailing List

Bob Picco wrote:
> Nick Piggin wrote:	[Sun Nov 06 2005, 07:58:26PM EST]
> 

>>Hmm, right - in __free_pages_bulk.
>>
>>Could we make a different call here, or is the full array of bad_range
>>checks required?
> 
> Not the full array. Just the pfn_valid call. Seems CONFIG_HOLES_IN_ZONE is
> already in page_alloc.c, perhaps just in __free_pages_bulk as a replacement
> for the bad_range call which isn't within a  BUG_ON check. It's somewhat of a 
> wart but already there. Otherwise we might want arch_holes_in_zone inline 
> which is only required by ia64 and noop for other arches.
> 

Ideally yes, it would be hidden away in an arch specific header file.

In the meantime I will just replace it with an ifdefed pfn_valid call.

> The only place I didn't look closely is the BUG_ON in expand. I'll do that
> tomorrow.
> 

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/14] mm: opt rmqueue
  2005-11-07  1:06     ` Nick Piggin
@ 2005-11-07  3:23       ` Andi Kleen
  2005-11-07  3:43         ` Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Andi Kleen @ 2005-11-07  3:23 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel

On Monday 07 November 2005 02:06, Nick Piggin wrote:

> Yes, all this turning on and off of interrupts does have a
> significant cost here.

How did you find out? 

>
> With the full patchset applied, most of the hot path statistics
> get put under areas that already require interrupts to be off,
> however there are still a few I didn't get around to doing.
> zone_statistics on CONFIG_NUMA, for example.

These should just be local_t 

>
> I wonder if local_t is still good on architectures like ppc64
> where it still requires an ll/sc sequence?

The current default fallback local_t doesn't require that. It uses
different fields indexed by !!in_interrupt()

-Andi

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/14] mm: opt rmqueue
  2005-11-07  3:23       ` Andi Kleen
@ 2005-11-07  3:43         ` Nick Piggin
  0 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  3:43 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen wrote:
> On Monday 07 November 2005 02:06, Nick Piggin wrote:
> 
> 
>>Yes, all this turning on and off of interrupts does have a
>>significant cost here.
> 
> 
> How did you find out? 
> 

Measuring the actual performance improvement on kbuild.
Not to mention that profiles for things like mod_page_state
go dramatically down, but you can't use that alone to be sure
of an improvement.

> 
>>With the full patchset applied, most of the hot path statistics
>>get put under areas that already require interrupts to be off,
>>however there are still a few I didn't get around to doing.
>>zone_statistics on CONFIG_NUMA, for example.
> 
> 
> These should just be local_t 
> 

Yep.

> 
>>I wonder if local_t is still good on architectures like ppc64
>>where it still requires an ll/sc sequence?
> 
> 
> The current default fallback local_t doesn't require that. It uses
> different fields indexed by !!in_interrupt()
> 

Right I didn't see that. ppc(32), then.

I think maybe for struct page_state there is not so much point
in using local_t because the hot page allocator paths can easily
be covered under the interrupt critical sections.

The other fields aren't very hot, and using local_t would bloat
this up by many cachelines on 64-bit architectures like ppc64,
and would make them probably noticably more expensive on 32s
like ppc.

Actually, the NUMA fields in the pcp lists can probably also
just be put under the interrupt-off section that the page
allocator uses. At least it should be much easier to do when
Seth's __alloc_pages cleanup goes in. I'll keep it in mind.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [rfc][patch 0/14] mm: performance improvements
  2005-11-07  1:51   ` Nick Piggin
@ 2005-11-07  3:57     ` Paul Jackson
  2005-11-07  4:51       ` Nick Piggin
  0 siblings, 1 reply; 33+ messages in thread
From: Paul Jackson @ 2005-11-07  3:57 UTC (permalink / raw)
  To: Nick Piggin; +Cc: hch, Linux-Kernel

> Maybe time to switch mailers... I'll see what I can do.

I recommend using a dedicated tool (patchbomb script) to send patches,
not ones email client.  It lets you prepare everything ahead of time in
your favorite editor, and obtains optimum results.

See the script I use, at:

  http://www.speakeasy.org/~pj99/sgi/sendpatchset

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.925.600.0401

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [rfc][patch 0/14] mm: performance improvements
  2005-11-07  3:57     ` Paul Jackson
@ 2005-11-07  4:51       ` Nick Piggin
  0 siblings, 0 replies; 33+ messages in thread
From: Nick Piggin @ 2005-11-07  4:51 UTC (permalink / raw)
  To: Paul Jackson; +Cc: hch, Linux-Kernel

Paul Jackson wrote:
>>Maybe time to switch mailers... I'll see what I can do.
> 
> 
> I recommend using a dedicated tool (patchbomb script) to send patches,
> not ones email client.  It lets you prepare everything ahead of time in
> your favorite editor, and obtains optimum results.
> 
> See the script I use, at:
> 
>   http://www.speakeasy.org/~pj99/sgi/sendpatchset
> 

Probably the best idea. I hadn't worried about those until now,
although I have several fairly large patchsets floating around.

Thanks,
Nick

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 9/14] mm: page_state opt
  2005-11-06  8:25                 ` [patch 9/14] mm: page_state opt Nick Piggin
  2005-11-06  8:26                   ` [patch 10/14] mm: single pcp list Nick Piggin
@ 2005-11-13  2:38                   ` Andi Kleen
  1 sibling, 0 replies; 33+ messages in thread
From: Andi Kleen @ 2005-11-13  2:38 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel

Nick Piggin <nickpiggin@yahoo.com.au> writes:

> 9/14
> 
> -- 
> SUSE Labs, Novell Inc.
> 
> Optimise page_state manipulations by introducing a direct accessor
> to page_state fields without disabling interrupts, in which case
> the callers must provide their own locking (either disable interrupts
> or not update from interrupt context).

I have a patchkit (which i need to update for the current kernel)
which replaces this with local_t. Gives much better code and is much
simpler and doesn't require turning off interrupts anywhere.

-Andi

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2005-11-13  2:39 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-11-06  8:11 [rfc][patch 0/14] mm: performance improvements Nick Piggin
2005-11-06  8:20 ` [patch 1/14] mm: opt rmqueue Nick Piggin
2005-11-06  8:20   ` [patch 2/14] mm: Nick Piggin
2005-11-06  8:20   ` [patch 2/14] mm: pte prefetch Nick Piggin
2005-11-06  8:21     ` [patch 3/14] mm: release opt Nick Piggin
2005-11-06  8:22       ` [patch 4/14] mm: rmap opt Nick Piggin
2005-11-06  8:23         ` [patch 5/14] mm: set_page_refs opt Nick Piggin
2005-11-06  8:24           ` [patch 6/14] mm: microopt conditions Nick Piggin
2005-11-06  8:24             ` [patch 7/14] mm: remove bad_range Nick Piggin
2005-11-06  8:25               ` [patch 8/14] mm: remove pcp_low Nick Piggin
2005-11-06  8:25                 ` [patch 9/14] mm: page_state opt Nick Piggin
2005-11-06  8:26                   ` [patch 10/14] mm: single pcp list Nick Piggin
2005-11-06  8:26                     ` [patch 11/14] mm: increase pcp size Nick Piggin
2005-11-06  8:27                       ` [patch 12/14] mm: variable " Nick Piggin
2005-11-06  8:27                         ` [patch 13/14] mm: cleanup zone_pcp Nick Piggin
2005-11-06  8:28                           ` [patch 14/14] mm: page_alloc cleanups Nick Piggin
2005-11-13  2:38                   ` [patch 9/14] mm: page_state opt Andi Kleen
2005-11-06 17:37               ` [patch 7/14] mm: remove bad_range Bob Picco
2005-11-07  0:58                 ` Nick Piggin
2005-11-07  3:00                   ` Bob Picco
2005-11-07  3:05                     ` Nick Piggin
2005-11-07  1:40           ` [patch 5/14] mm: set_page_refs opt Christoph Hellwig
2005-11-07  1:45             ` Nick Piggin
2005-11-06  8:35     ` [patch 2/14] mm: pte prefetch Arjan van de Ven
2005-11-06  8:51       ` Nick Piggin
2005-11-06 17:37   ` [patch 1/14] mm: opt rmqueue Andi Kleen
2005-11-07  1:06     ` Nick Piggin
2005-11-07  3:23       ` Andi Kleen
2005-11-07  3:43         ` Nick Piggin
2005-11-07  1:39 ` [rfc][patch 0/14] mm: performance improvements Christoph Hellwig
2005-11-07  1:51   ` Nick Piggin
2005-11-07  3:57     ` Paul Jackson
2005-11-07  4:51       ` Nick Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.