linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][RFC] simpler __alloc_pages{_limit}
       [not found] <200108242253.f7OMrbQ20401@mailf.telia.com>
@ 2001-08-25  0:48 ` Roger Larsson
  2001-08-25  8:48   ` Mike Galbraith
  2001-08-25 11:55 ` Stephan von Krawczynski
  1 sibling, 1 reply; 8+ messages in thread
From: Roger Larsson @ 2001-08-25  0:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: Stephan von Krawczynski

[-- Attachment #1: Type: text/plain, Size: 780 bytes --]

Hi again,

[two typos corrected from the version at linux-mm]

I read through __alloc_pages again and found out that allocs with order > 0
are not treated nicely.

To begin with if order > 0 then direct_reclaim will be false even if it is
allowed to wait...

This version allows "direct_reclaim" with order > 0 !

How?

Like we finally end up doing anyway...
reclaiming pages and freeing.

While adding this I thought why not always do it like this,
even with order == 0?
since it will allow for merging of pages to higher orders.
Before returning a page that was not mergeable...

Doing this - the code started to collaps...
__alloc_pages_limit could suddenly handle all special cases!
(with small functional differences)

Comments?

/RogerL

-- 
Roger Larsson
Skellefteå
Sweden

[-- Attachment #2: patch-2.4.8-pre3-alloc_pages_limit-R2 --]
[-- Type: text/x-diff, Size: 8682 bytes --]

*******************************************
Patch prepared by: roger.larsson@norran.net
Name of file: patches/patch-2.4.8-pre3-alloc_pages_limit-R2

*******************************************
Patch prepared by: roger.larsson@norran.net
Name of file: /home/roger/patches/patch-2.4.8-pre3-alloc_pages_limit-R2

--- linux/mm/page_alloc.c.orig	Thu Aug 23 22:02:04 2001
+++ linux/mm/page_alloc.c	Sat Aug 25 01:47:02 2001
@@ -212,9 +212,12 @@
 	return NULL;
 }
 
-#define PAGES_MIN	0
-#define PAGES_LOW	1
-#define PAGES_HIGH	2
+#define PAGES_MEMALLOC  0
+#define PAGES_CRITICAL  1
+#define PAGES_MIN	2
+#define PAGES_LOW	3
+#define PAGES_HIGH	4
+#define PAGES_LOW_FREE  5
 
 /*
  * This function does the dirty work for __alloc_pages
@@ -228,7 +231,7 @@
 
 	for (;;) {
 		zone_t *z = *(zone++);
-		unsigned long water_mark;
+		unsigned long water_mark, free_min;
 
 		if (!z)
 			break;
@@ -239,10 +242,25 @@
 		 * We allocate if the number of free + inactive_clean
 		 * pages is above the watermark.
 		 */
+
+		free_min = z->pages_min;
+
 		switch (limit) {
+			case PAGES_MEMALLOC:
+				free_min = water_mark = 1;
+				break;
+			case PAGES_CRITICAL:
+				/* XXX: is pages_min/4 a good amount to reserve for this? */
+				free_min = water_mark = z->pages_min / 4;
+				break;
 			default:
 			case PAGES_MIN:
-				water_mark = z->pages_min;
+				water_mark = z->pages_min; /*  + (1 << order) - 1; */
+				break;
+			case PAGES_LOW_FREE:
+				free_min = water_mark = z->pages_low;
+				if (direct_reclaim)
+					printk(KERN_WARNING "__alloc_free_limit(PAGES_FREE && direct_reclaim = 1)\n");
 				break;
 			case PAGES_LOW:
 				water_mark = z->pages_low;
@@ -251,23 +269,44 @@
 				water_mark = z->pages_high;
 		}
 
-		if (z->free_pages + z->inactive_clean_pages >= water_mark) {
-			struct page *page = NULL;
-			/* If possible, reclaim a page directly. */
-			if (direct_reclaim)
-				page = reclaim_page(z);
-			/* If that fails, fall back to rmqueue. */
-			if (!page)
-				page = rmqueue(z, order);
-			if (page)
-				return page;
-		}
+
+
+		if (z->free_pages + z->inactive_clean_pages < water_mark) 
+			continue;
+
+		do {
+			/*
+			 * Reclaim a page from the inactive_clean list.
+			 * low water mark. Free all reclaimed pages to
+			 * give them a chance to merge to higher orders.
+			 */
+			if (direct_reclaim) {
+				struct page *reclaim = reclaim_page(z);
+				if (reclaim) {
+					__free_page(reclaim);
+				} else if (z->inactive_clean_pages > 0) {
+					printk(KERN_ERR "reclaim_pages failed but there are inactive_clean_pages\n");
+					break;
+				}
+			}
+				
+			/* Always alloc via rmqueue */
+			if (z->free_pages >= free_min)
+			{
+				struct page *page = rmqueue(z, order);
+				if (page)
+					return page;
+			}
+
+			/* if it is possible to make progress by retrying - do it */
+		} while (direct_reclaim && z->inactive_clean_pages);
 	}
 
 	/* Found nothing. */
 	return NULL;
 }
 
+
 #ifndef CONFIG_DISCONTIGMEM
 struct page *_alloc_pages(unsigned int gfp_mask, unsigned long order)
 {
@@ -281,7 +320,6 @@
  */
 struct page * __alloc_pages(unsigned int gfp_mask, unsigned long order, zonelist_t *zonelist)
 {
-	zone_t **zone;
 	int direct_reclaim = 0;
 	struct page * page;
 
@@ -300,34 +338,24 @@
 
 	/*
 	 * Can we take pages directly from the inactive_clean
-	 * list?
+	 * list? __alloc_pages_limit now handles any 'order'.
 	 */
-	if (order == 0 && (gfp_mask & __GFP_WAIT))
+	if (gfp_mask & __GFP_WAIT)
 		direct_reclaim = 1;
 
-try_again:
 	/*
 	 * First, see if we have any zones with lots of free memory.
 	 *
 	 * We allocate free memory first because it doesn't contain
 	 * any data ... DUH!
 	 */
-	zone = zonelist->zones;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
-		if (!z->size)
-			BUG();
+	page = __alloc_pages_limit(zonelist, order, PAGES_LOW_FREE, 0);
+	if (page)
+		return page;
 
-		if (z->free_pages >= z->pages_low) {
-			page = rmqueue(z, order);
-			if (page)
-				return page;
-		} else if (z->free_pages < z->pages_min &&
-					waitqueue_active(&kreclaimd_wait)) {
-				wake_up_interruptible(&kreclaimd_wait);
-		}
+	/* "all" requested zones has less than LOW free memory, start kreclaimd */
+	if (waitqueue_active(&kreclaimd_wait)) {
+		wake_up_interruptible(&kreclaimd_wait);
 	}
 
 	/*
@@ -356,7 +384,7 @@
 
 	/*
 	 * OK, none of the zones on our zonelist has lots
-	 * of pages free.
+	 * of pages free or a higher order alloc did not succeed
 	 *
 	 * We wake up kswapd, in the hope that kswapd will
 	 * resolve this situation before memory gets tight.
@@ -371,6 +399,8 @@
 	 * - if we don't have __GFP_IO set, kswapd may be
 	 *   able to free some memory we can't free ourselves
 	 */
+
+
 	wakeup_kswapd();
 	if (gfp_mask & __GFP_WAIT) {
 		__set_current_state(TASK_RUNNING);
@@ -385,6 +415,7 @@
 	 * Kswapd should, in most situations, bring the situation
 	 * back to normal in no time.
 	 */
+try_again:
 	page = __alloc_pages_limit(zonelist, order, PAGES_MIN, direct_reclaim);
 	if (page)
 		return page;
@@ -398,40 +429,21 @@
 	 * - we're /really/ tight on memory
 	 * 	--> try to free pages ourselves with page_launder
 	 */
-	if (!(current->flags & PF_MEMALLOC)) {
+	if (!(current->flags & PF_MEMALLOC) &&
+	    (gfp_mask & __GFP_WAIT)) {
 		/*
-		 * Are we dealing with a higher order allocation?
-		 *
-		 * Move pages from the inactive_clean to the free list
-		 * in the hope of creating a large, physically contiguous
-		 * piece of free memory.
+		 * Move pages from the inactive_dirty to the inactive_clean
 		 */
-		if (order > 0 && (gfp_mask & __GFP_WAIT)) {
-			zone = zonelist->zones;
-			/* First, clean some dirty pages. */
-			current->flags |= PF_MEMALLOC;
-			page_launder(gfp_mask, 1);
-			current->flags &= ~PF_MEMALLOC;
-			for (;;) {
-				zone_t *z = *(zone++);
-				if (!z)
-					break;
-				if (!z->size)
-					continue;
-				while (z->inactive_clean_pages) {
-					struct page * page;
-					/* Move one page to the free list. */
-					page = reclaim_page(z);
-					if (!page)
-						break;
-					__free_page(page);
-					/* Try if the allocation succeeds. */
-					page = rmqueue(z, order);
-					if (page)
-						return page;
-				}
-			}
-		}
+
+		/* First, clean some dirty pages. */
+		current->flags |= PF_MEMALLOC;
+		page_launder(gfp_mask, 1);
+		current->flags &= ~PF_MEMALLOC;
+
+		page = __alloc_pages_limit(zonelist, order, PAGES_MIN, direct_reclaim); 
+		if (page)
+			return page;
+
 		/*
 		 * When we arrive here, we are really tight on memory.
 		 * Since kswapd didn't succeed in freeing pages for us,
@@ -447,17 +459,15 @@
 		 * any progress freeing pages, in that case it's better
 		 * to give up than to deadlock the kernel looping here.
 		 */
-		if (gfp_mask & __GFP_WAIT) {
-			if (!order || total_free_shortage()) {
-				int progress = try_to_free_pages(gfp_mask);
-				if (progress || (gfp_mask & __GFP_FS))
-					goto try_again;
-				/*
-				 * Fail in case no progress was made and the
-				 * allocation may not be able to block on IO.
-				 */
-				return NULL;
-			}
+		if (!order || total_free_shortage()) {
+			int progress = try_to_free_pages(gfp_mask);
+			if (progress || (gfp_mask & __GFP_FS))
+				goto try_again;
+			/*
+			 * Fail in case no progress was made and the
+			 * allocation may not be able to block on IO.
+			 */
+			return NULL;
 		}
 	}
 
@@ -471,35 +481,11 @@
 	 * in the system, otherwise it would be just too easy to
 	 * deadlock the system...
 	 */
-	zone = zonelist->zones;
-	for (;;) {
-		zone_t *z = *(zone++);
-		struct page * page = NULL;
-		if (!z)
-			break;
-		if (!z->size)
-			BUG();
-
-		/*
-		 * SUBTLE: direct_reclaim is only possible if the task
-		 * becomes PF_MEMALLOC while looping above. This will
-		 * happen when the OOM killer selects this task for
-		 * instant execution...
-		 */
-		if (direct_reclaim) {
-			page = reclaim_page(z);
-			if (page)
-				return page;
-		}
-
-		/* XXX: is pages_min/4 a good amount to reserve for this? */
-		if (z->free_pages < z->pages_min / 4 &&
-				!(current->flags & PF_MEMALLOC))
-			continue;
-		page = rmqueue(z, order);
-		if (page)
-			return page;
-	}
+	page = __alloc_pages_limit(zonelist, order,
+				   current->flags & PF_MEMALLOC ? PAGES_MEMALLOC : PAGES_CRITICAL,
+				   direct_reclaim); 
+	if (page)
+		return page;
 
 	/* No luck.. */
 	printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
  2001-08-25  0:48 ` [PATCH][RFC] simpler __alloc_pages{_limit} Roger Larsson
@ 2001-08-25  8:48   ` Mike Galbraith
  2001-08-25 18:09     ` Daniel Phillips
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Galbraith @ 2001-08-25  8:48 UTC (permalink / raw)
  To: Roger Larsson; +Cc: linux-kernel, Stephan von Krawczynski

On Sat, 25 Aug 2001, Roger Larsson wrote:

> Hi again,

Howdy,

> [two typos corrected from the version at linux-mm]
>
> I read through __alloc_pages again and found out that allocs with order > 0
> are not treated nicely.

But they shouldn't be treated _too_ nicely ;-)  IMHO, you should never
exaust the entire clean list trying to service a high order allocation.
These allocations can wait.

> To begin with if order > 0 then direct_reclaim will be false even if it is
> allowed to wait...
>
> This version allows "direct_reclaim" with order > 0 !
>
> How?
>
> Like we finally end up doing anyway...
> reclaiming pages and freeing.

Yes, and the way we currently do it endangers other allocations in
that the effect the high order allocation will have upon the zone is
not accounted for.  If we're at that point, servicing a high order
allocation, and you actually succeed in assembling that possibly BIG
chunk, you may deplete the zone and leave yourself in a situation where
you can't service an incoming burst of atomic allocations.  That makes
drivers that try fallback allocations happy, but can hurt like heck.
If the thing continues to fail, you're also in pain, because the entire
time you're reclaiming, you're eating cached data for possibly no gain.

> While adding this I thought why not always do it like this,
> even with order == 0?
> since it will allow for merging of pages to higher orders.
> Before returning a page that was not mergeable...
>
> Doing this - the code started to collaps...
> __alloc_pages_limit could suddenly handle all special cases!
> (with small functional differences)
>
> Comments?

 +        /* Always alloc via rmqueue */

That adds overhead to order 0 allocations.

MHO:

I think the easiest way to handle high order allocations is to do _low_
volume background reclamation if high order allocations might fail.  ie
put a little effort into keeping such allocations available, but don't
do massive effort.  Cache tends to lose it's value over time, so dumping
small quantities over time shouldn't hurt.  This would also have the
benefit of scanning the inactive lists even when there's little activity
so that list information (page accessed _yesterday_?) won't get stale.

I think it's ~fine to reclaim for up to say order 2, but beyond that, it
doesn't have any up side that I can see.. only down.

btw, I wonder why we don't do memory_pressure [+-]= 1 << order.

	-Mike


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
       [not found] <200108242253.f7OMrbQ20401@mailf.telia.com>
  2001-08-25  0:48 ` [PATCH][RFC] simpler __alloc_pages{_limit} Roger Larsson
@ 2001-08-25 11:55 ` Stephan von Krawczynski
  2001-08-25 16:25   ` Roger Larsson
                     ` (2 more replies)
  1 sibling, 3 replies; 8+ messages in thread
From: Stephan von Krawczynski @ 2001-08-25 11:55 UTC (permalink / raw)
  To: Roger Larsson; +Cc: linux-kernel

On Sat, 25 Aug 2001 02:48:28 +0200
Roger Larsson <roger.larsson@norran.net> wrote:

> Hi again,
> 
> [two typos corrected from the version at linux-mm]
> [...]
> Doing this - the code started to collaps...
> __alloc_pages_limit could suddenly handle all special cases!
> (with small functional differences)
> 
> Comments?

Hi Roger,

I tested your page against straight 2.4.9 (where it applied mostly, the rest I did manually) and experience the following:
1) system gets slow, even in times where plenty of free memory is available. There must be some overhead inside.
2) It does not really work around the basic problem of too many cached pages in case of heavy filesystem action, I do get the already known "kernel: __alloc_pages: 2-order allocation failed." by simply copying files a lot.
3) Even in high load situations the CPU load seems to get worse, I made it up to 7 with normal file copying on a SMP 1GHz 1GB RAM machine.

Hm, I guess that doesn't really work as you expected.

Regards,
Stephan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
  2001-08-25 11:55 ` Stephan von Krawczynski
@ 2001-08-25 16:25   ` Roger Larsson
  2001-08-25 19:24   ` Roger Larsson
  2001-08-28 11:22   ` Stephan von Krawczynski
  2 siblings, 0 replies; 8+ messages in thread
From: Roger Larsson @ 2001-08-25 16:25 UTC (permalink / raw)
  To: Stephan von Krawczynski; +Cc: linux-kernel

On Saturday den 25 August 2001 13:55, Stephan von Krawczynski wrote:
> On Sat, 25 Aug 2001 02:48:28 +0200
>
> Roger Larsson <roger.larsson@norran.net> wrote:
> > Hi again,
> >
> > [two typos corrected from the version at linux-mm]
> > [...]
> > Doing this - the code started to collaps...
> > __alloc_pages_limit could suddenly handle all special cases!
> > (with small functional differences)
> >
> > Comments?
>
> Hi Roger,
>
> I tested your page against straight 2.4.9 (where it applied mostly, the
> rest I did manually) and experience the following: 
> 1) system gets slow, even in times where plenty of free memory is
> available. There must be some overhead inside.

It is not unlikely because it care too much about the higher order 
allocations. It needs a higher order page and really tries...

> 2) It does not really work around the basic problem of too
> many cached pages in case of heavy filesystem action, I do get the already
> known "kernel: __alloc_pages: 2-order allocation failed." by simply copying
> files a lot. 

Is this with raiserfs and/or nfs? And without highmem support?
Why is 2-order allocations needed???
Can anyone answer?
Higher order allocs during normal operation is not that nice...

> 3) Even in high load situations the CPU load seems to get
> worse, I made it up to 7 with normal file copying on a SMP 1GHz 1GB RAM
> machine.

Might also be related to the higher order. Freeing too much inactive pages
to satisfy the request...
SMP might be a factor since the patch will go harder on the locks...

>
> Hm, I guess that doesn't really work as you expected.

Well, I make a version that gives up on higher order allocations more 
quickly...

But the real problem might be - why are the higher order allocations
needed anyway?

/RogerL

-- 
Roger Larsson
Skellefteå
Sweden

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
  2001-08-25  8:48   ` Mike Galbraith
@ 2001-08-25 18:09     ` Daniel Phillips
  2001-08-25 19:53       ` Mike Galbraith
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Phillips @ 2001-08-25 18:09 UTC (permalink / raw)
  To: Mike Galbraith, Roger Larsson; +Cc: linux-kernel, Stephan von Krawczynski

On August 25, 2001 10:48 am, Mike Galbraith wrote:
> I think the easiest way to handle high order allocations is to do _low_
> volume background reclamation if high order allocations might fail.  ie
> put a little effort into keeping such allocations available, but don't
> do massive effort.  Cache tends to lose it's value over time, so dumping
> small quantities over time shouldn't hurt.

Ah, time.  Please see below for an alternative way of looking at time.

> This would also have the
> benefit of scanning the inactive lists even when there's little activity
> so that list information (page accessed _yesterday_?) won't get stale.

It would probably improve the average performance somewhat.  I'm looking at a 
more direct approach: if we have a shortage at order(m) then, then for each 
(free) member of order(m-1) look in mem_map for its buddy, and if it doesn't 
look too active, try to free it, thus moving the allocation unit up one 
order.  Could you examine this idea please and check for obvious holes?

> I think it's ~fine to reclaim for up to say order 2, but beyond that, it
> doesn't have any up side that I can see.. only down.

Yep.

> btw, I wonder why we don't do memory_pressure [+-]= 1 << order.

We should, at least the "memory_pressure += 1 << order" flavor.  Patch?

IMO the concept of memory pressure is bogus.  Instead we should calculate the 
number of pages to scan directly from the number of pages consumed by 
alloc_pages and the probability that deactivated pages will be rescued.  The 
latter statistic we can measure.  We can control it, too, by controlling the 
length of the inactive queue.  The shorter the inactive queue, the lower the 
probability a deactivated page will be rescued.

The idea of clocking recalculate_vm_stats by real time is also bogus.  MM 
activity does not have a linear relationship to real time under most loads 
(exceptions: those loads clocked by a realtime constraint, such as network 
bandwidth on a saturated network).  We should be clocking the mm using a time 
quantum of "one page alloced".  A scan of the literature shows considerable 
support for this view, and it's also intuitive, don't you think?

What this means in practice is, sure we can have kswapd wake once a second or 
once per 100 ms, but the amount of scanning it does needs to be based on the 
number of pages actually alloced in that interval.  (Intuitively, this 
corresponds to the number of free pages needed to replace those alloced.)  
>From the mm's perspective, kswapd's constant realtime interval is a variable 
delta-t in terms of mm time, meaning that we need to scale all proportional 
mm activity by the measured delta.  Hmm, clear as mud?  This is a standard 
idea from control theory.

BTW, what the heck is this supposed to do (page_alloc.c):

144         if (memory_pressure > NR_CPUS)
145                 memory_pressure--;

--
Daniel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
  2001-08-25 11:55 ` Stephan von Krawczynski
  2001-08-25 16:25   ` Roger Larsson
@ 2001-08-25 19:24   ` Roger Larsson
  2001-08-28 11:22   ` Stephan von Krawczynski
  2 siblings, 0 replies; 8+ messages in thread
From: Roger Larsson @ 2001-08-25 19:24 UTC (permalink / raw)
  To: Stephan von Krawczynski; +Cc: linux-kernel

On Saturdayen den 25 August 2001 13:55, Stephan von Krawczynski wrote:
> On Sat, 25 Aug 2001 02:48:28 +0200
>
> 2) It does not really work around the basic problem of too
> many cached pages in case of heavy filesystem action, I do get the already
> known "kernel: __alloc_pages: 2-order allocation failed." by simply copying
> files a lot. 

Hi again,

I would like to see from were these higher order allocs comes from.
Please insert something like this:

                printk("pid=%d; __alloc_pages(gfp=0x%x, order=%ld, ...)\n", 
current->pid, gfp_mask, order);
                show_trace(NULL);

(You have something resembling the first line - I added this in the beginning
 of __alloc_pages, you may add it together with the error message)
If everything is ok (correct Symbols.map) you should be able to get
a symbolic call trace in /var/logs/messages
(some are false positives, but they can be filtered out manually)

/RogerL

-- 
Roger Larsson
Skellefteå
Sweden

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
  2001-08-25 18:09     ` Daniel Phillips
@ 2001-08-25 19:53       ` Mike Galbraith
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Galbraith @ 2001-08-25 19:53 UTC (permalink / raw)
  To: Daniel Phillips; +Cc: Roger Larsson, linux-kernel, Stephan von Krawczynski

On Sat, 25 Aug 2001, Daniel Phillips wrote:

> On August 25, 2001 10:48 am, Mike Galbraith wrote:
> > I think the easiest way to handle high order allocations is to do _low_
> > volume background reclamation if high order allocations might fail.  ie
> > put a little effort into keeping such allocations available, but don't
> > do massive effort.  Cache tends to lose it's value over time, so dumping
> > small quantities over time shouldn't hurt.
>
> Ah, time.  Please see below for an alternative way of looking at time.
>
> > This would also have the
> > benefit of scanning the inactive lists even when there's little activity
> > so that list information (page accessed _yesterday_?) won't get stale.
>
> It would probably improve the average performance somewhat.  I'm looking at a

It makes the system smoother, but doesn't change throughput much.  The
biggest effect is felt upon transition from idle launder/reclaim wise
to busy.  There, it can prevent unpleasant suprises.

> more direct approach: if we have a shortage at order(m) then, then for each
> (free) member of order(m-1) look in mem_map for its buddy, and if it doesn't
> look too active, try to free it, thus moving the allocation unit up one
> order.  Could you examine this idea please and check for obvious holes?

I don't know enough about the guts of the buddy.  Someone with stars and
moons on his pointy hat suggested checking at reclaim time to see if a
page would buddy up.. sounds the same to me.

> > I think it's ~fine to reclaim for up to say order 2, but beyond that, it
> > doesn't have any up side that I can see.. only down.
>
> Yep.
>
> > btw, I wonder why we don't do memory_pressure [+-]= 1 << order.
>
> We should, at least the "memory_pressure += 1 << order" flavor.  Patch?
>
> IMO the concept of memory pressure is bogus.  Instead we should calculate the
> number of pages to scan directly from the number of pages consumed by
> alloc_pages and the probability that deactivated pages will be rescued.  The
> latter statistic we can measure.  We can control it, too, by controlling the
> length of the inactive queue.  The shorter the inactive queue, the lower the
> probability a deactivated page will be rescued.
>
> The idea of clocking recalculate_vm_stats by real time is also bogus.  MM
> activity does not have a linear relationship to real time under most loads
> (exceptions: those loads clocked by a realtime constraint, such as network
> bandwidth on a saturated network).  We should be clocking the mm using a time
> quantum of "one page alloced".  A scan of the literature shows considerable
> support for this view, and it's also intuitive, don't you think?
>
> What this means in practice is, sure we can have kswapd wake once a second or
> once per 100 ms, but the amount of scanning it does needs to be based on the
> number of pages actually alloced in that interval.  (Intuitively, this
> corresponds to the number of free pages needed to replace those alloced.)
> >From the mm's perspective, kswapd's constant realtime interval is a variable
> delta-t in terms of mm time, meaning that we need to scale all proportional
> mm activity by the measured delta.  Hmm, clear as mud?  This is a standard
> idea from control theory.

Oh, it's much clearer than mud.. intuitive.  I did some experiments with
aging/laundering directly tied to quantity of 'unanswered' allocations
and it worked pretty well.

> BTW, what the heck is this supposed to do (page_alloc.c):
>
> 144         if (memory_pressure > NR_CPUS)
> 145                 memory_pressure--;

Keep it from going negative.

	-Mike


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][RFC] simpler __alloc_pages{_limit}
  2001-08-25 11:55 ` Stephan von Krawczynski
  2001-08-25 16:25   ` Roger Larsson
  2001-08-25 19:24   ` Roger Larsson
@ 2001-08-28 11:22   ` Stephan von Krawczynski
  2 siblings, 0 replies; 8+ messages in thread
From: Stephan von Krawczynski @ 2001-08-28 11:22 UTC (permalink / raw)
  To: Roger Larsson; +Cc: linux-kernel

On Sat, 25 Aug 2001 21:24:40 +0200
Roger Larsson <roger.larsson@skelleftea.mail.telia.com> wrote:

> I would like to see from were these higher order allocs comes from.
> Please insert something like this:
> 
>                 printk("pid=%d; __alloc_pages(gfp=0x%x, order=%ld, ...)\n", 
> current->pid, gfp_mask, order);
>                 show_trace(NULL);

Ok, Roger (and all the others involved).
I am back with new tests for memory problems.
Current test setup is:

software: 2.4.10-pre1 with Marco Tosatti's latest mem patch.
hardware: P3 1GHz 1 GB RAM, Adaptec SCSI, IBM 36GB HD 100MBit Tulip (roughly :-)

I copy files from via NFS to the machine on a reiserfs partition and read a CD (with xcdroast) to the same partition (reading an image). A note on that: NFS is exported with option no_subtree_check, because otherwise it fails on low mem condition and truncates files. This effect _only_ shows when exporting a reiserfs filesystem, on ext2 works well. Thanks to Neil Brown for helping me find this out.

meminfo before test:

        total:    used:    free:  shared: buffers:  cached:
Mem:  921726976 92602368 829124608        0  6684672 37040128
Swap: 271392768        0 271392768
MemTotal:       900124 kB
MemFree:        809692 kB
MemShared:           0 kB
Buffers:          6528 kB
Cached:          36172 kB
SwapCached:          0 kB
Active:           3672 kB
Inact_dirty:     39028 kB
Inact_clean:         0 kB
Inact_target:      704 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       900124 kB
LowFree:        809692 kB
SwapTotal:      265032 kB
SwapFree:       265032 kB

First experience during test: CPU load is pretty high (varying around 6 - 8), sometimes results from "cat /proc/meminfo" take half a minute to show up.
Anyway, everything runs ok (besides the fact that it looks slow) until reading the CD is done. Then this shows up:

Aug 28 13:08:20 admin kernel: __alloc_pages: 3-order allocation failed. 
Aug 28 13:08:20 admin kernel: pid=1139; __alloc_pages(gfp=0x20, order=3, ...)
Aug 28 13:08:20 admin kernel: Call Trace: [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>] [<fdceb7d7>]
Aug 28 13:08:20 admin kernel:    [<fdcec0f5>] [<fdcea589>] [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>]
Aug 28 13:08:20 admin kernel:    [<fdceb6bd>] [filemap_nopage+171/1008] [do_no_page+90/244] [handle_mm_fault+97/192] [<fdce94aa>] [do_page_fault+0/1164]
Aug 28 13:08:20 admin kernel:    [dentry_open+189/316] [filp_open+82/92] [do_fcntl+370/712] [sys_ioctl+443/532] [system_call+51/56]
Aug 28 13:08:20 admin kernel: __alloc_pages: 3-order allocation failed.
Aug 28 13:08:20 admin kernel: pid=1139; __alloc_pages(gfp=0x20, order=3, ...)
Aug 28 13:08:20 admin kernel: Call Trace: [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>] [<fdceb7d7>]
Aug 28 13:08:20 admin kernel:    [<fdcec0f5>] [<fdcea589>] [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>]
Aug 28 13:08:20 admin kernel:    [<fdceb6bd>] [filemap_nopage+171/1008] [do_no_page+90/244] [handle_mm_fault+97/192] [<fdce94aa>] [do_page_fault+0/1164]
Aug 28 13:08:20 admin kernel:    [dentry_open+189/316] [filp_open+82/92] [do_fcntl+370/712] [sys_ioctl+443/532] [system_call+51/56]
Aug 28 13:08:20 admin kernel: __alloc_pages: 3-order allocation failed. 
Aug 28 13:08:20 admin kernel: pid=1139; __alloc_pages(gfp=0x20, order=3, ...)
Aug 28 13:08:20 admin kernel: Call Trace: [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>] [<fdceb7d7>]
Aug 28 13:08:20 admin kernel:    [<fdcec0f5>] [<fdcea589>] [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>]
Aug 28 13:08:20 admin kernel:    [<fdceb6bd>] [filemap_nopage+171/1008] [do_no_page+90/244] [handle_mm_fault+97/192] [<fdce94aa>] [do_page_fault+0/1164]
Aug 28 13:08:20 admin kernel:    [dentry_open+189/316] [filp_open+82/92] [do_fcntl+370/712] [sys_ioctl+443/532] [system_call+51/56]
Aug 28 13:08:20 admin kernel: __alloc_pages: 3-order allocation failed. 
Aug 28 13:08:20 admin kernel: pid=1139; __alloc_pages(gfp=0x20, order=3, ...)
Aug 28 13:08:20 admin kernel: Call Trace: [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>] [<fdceb7d7>] 
Aug 28 13:08:20 admin kernel:    [<fdcec0f5>] [<fdcea589>] [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>]
Aug 28 13:08:20 admin kernel:    [<fdceb6bd>] [filemap_nopage+171/1008] [do_no_page+90/244] [handle_mm_fault+97/192] [<fdce94aa>] [do_page_fault+0/1164]
Aug 28 13:08:20 admin kernel:    [dentry_open+189/316] [filp_open+82/92] [do_fcntl+370/712] [sys_ioctl+443/532] [system_call+51/56]
Aug 28 13:08:20 admin kernel: __alloc_pages: 3-order allocation failed. 
Aug 28 13:08:20 admin kernel: pid=1139; __alloc_pages(gfp=0x20, order=3, ...)
Aug 28 13:08:20 admin kernel: Call Trace: [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>] [<fdceb7d7>]
Aug 28 13:08:20 admin kernel:    [<fdcec0f5>] [<fdcea589>] [_alloc_pages+22/24] [__get_free_pages+10/24] [<fdcec826>] [<fdcec8f5>] 
Aug 28 13:08:20 admin kernel:    [<fdceb6bd>] [filemap_nopage+171/1008] [do_no_page+90/244] [handle_mm_fault+97/192] [<fdce94aa>] [do_page_fault+0/1164]
Aug 28 13:08:20 admin kernel:    [dentry_open+189/316] [filp_open+82/92] [do_fcntl+370/712] [sys_ioctl+443/532] [system_call+51/56]

meminfo shows:

        total:    used:    free:  shared: buffers:  cached:
Mem:  921726976 918597632  3129344        0 26300416 783720448
Swap: 271392768        0 271392768
MemTotal:       900124 kB
MemFree:          3056 kB
MemShared:           0 kB
Buffers:         25684 kB
Cached:         765352 kB
SwapCached:          0 kB
Active:          66372 kB
Inact_dirty:    723020 kB
Inact_clean:      1644 kB
Inact_target:     5708 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       900124 kB
LowFree:          3056 kB
SwapTotal:      265032 kB
SwapFree:       265032 kB

I was a bit surprised by the kernel errors. I just wanted to start the CD compare to image, but these problems showed up before that.

If I can do any additional debugging, tell me. There are some unresolved addresses above, maybe I made some mistake with System.map (name System.map-2.4.10-pre1 should be ok, isn't it?)

Regards,
Stephan


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2001-08-28 11:24 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <200108242253.f7OMrbQ20401@mailf.telia.com>
2001-08-25  0:48 ` [PATCH][RFC] simpler __alloc_pages{_limit} Roger Larsson
2001-08-25  8:48   ` Mike Galbraith
2001-08-25 18:09     ` Daniel Phillips
2001-08-25 19:53       ` Mike Galbraith
2001-08-25 11:55 ` Stephan von Krawczynski
2001-08-25 16:25   ` Roger Larsson
2001-08-25 19:24   ` Roger Larsson
2001-08-28 11:22   ` Stephan von Krawczynski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).