All of lore.kernel.org
 help / color / mirror / Atom feed
* Remove page flags for software suspend
@ 2007-02-16 10:13 Christoph Lameter
  2007-02-16 10:56 ` Rafael J. Wysocki
  0 siblings, 1 reply; 85+ messages in thread
From: Christoph Lameter @ 2007-02-16 10:13 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: linux-mm

I think we can just move the flags completely into the kernel/power 
directory? This centralizes all your handling of pageflags into snapshot.c 
so that you need no external definitions anymore.


Index: linux-2.6.20-mm1/include/linux/mmzone.h
===================================================================
--- linux-2.6.20-mm1.orig/include/linux/mmzone.h	2007-02-16 01:11:46.000000000 -0800
+++ linux-2.6.20-mm1/include/linux/mmzone.h	2007-02-16 01:12:23.000000000 -0800
@@ -295,6 +295,7 @@ struct zone {
 	unsigned long		spanned_pages;	/* total size, including holes */
 	unsigned long		present_pages;	/* amount of memory (excluding holes) */
 
+	unsigned long		*suspend_flags;
 	/*
 	 * rarely used fields:
 	 */
Index: linux-2.6.20-mm1/include/linux/page-flags.h
===================================================================
--- linux-2.6.20-mm1.orig/include/linux/page-flags.h	2007-02-16 01:05:26.000000000 -0800
+++ linux-2.6.20-mm1/include/linux/page-flags.h	2007-02-16 01:16:45.000000000 -0800
@@ -82,13 +82,11 @@
 #define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
-#define PG_nosave		13	/* Used for system suspend/resume */
 #define PG_compound		14	/* Part of a compound page */
 #define PG_swapcache		15	/* Swap page: swp_entry_t in private */
 
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
 #define PG_mlocked		20	/* Page is mlocked */
@@ -192,16 +190,6 @@ static inline void SetPageUptodate(struc
 #define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback,	\
 							&(page)->flags)
 
-#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
-#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
-#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
-#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
-#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
-
-#define PageNosaveFree(page)	test_bit(PG_nosave_free, &(page)->flags)
-#define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
-#define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
-
 #define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
 #define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
 #define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)
Index: linux-2.6.20-mm1/include/linux/suspend.h
===================================================================
--- linux-2.6.20-mm1.orig/include/linux/suspend.h	2007-02-16 01:15:30.000000000 -0800
+++ linux-2.6.20-mm1/include/linux/suspend.h	2007-02-16 01:57:51.000000000 -0800
@@ -21,7 +22,6 @@ struct pbe {
 
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
-extern void mark_free_pages(struct zone *zone);
 
 #ifdef CONFIG_PM
 /* kernel/power/swsusp.c */
@@ -42,6 +42,18 @@ static inline int software_suspend(void)
 }
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+int suspend_flags_init(struct zone *zone, unsigned long zone_size_pages);
+void mark_free_pages(struct zone *zone);
+#else
+static inline int suspend_flags_init(struct zone *zone, unsigned long zone_size_pages)
+{
+	return 0;
+}
+
+static inline void mark_free_pages(struct zone *zone) {}
+#endif
+
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
Index: linux-2.6.20-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.20-mm1.orig/mm/page_alloc.c	2007-02-16 01:22:09.000000000 -0800
+++ linux-2.6.20-mm1/mm/page_alloc.c	2007-02-16 01:40:39.000000000 -0800
@@ -767,40 +767,6 @@ static void __drain_pages(unsigned int c
 }
 
 #ifdef CONFIG_PM
-
-void mark_free_pages(struct zone *zone)
-{
-	unsigned long pfn, max_zone_pfn;
-	unsigned long flags;
-	int order;
-	struct list_head *curr;
-
-	if (!zone->spanned_pages)
-		return;
-
-	spin_lock_irqsave(&zone->lock, flags);
-
-	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
-	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!PageNosave(page))
-				ClearPageNosaveFree(page);
-		}
-
-	for (order = MAX_ORDER - 1; order >= 0; --order)
-		list_for_each(curr, &zone->free_area[order].free_list) {
-			unsigned long i;
-
-			pfn = page_to_pfn(list_entry(curr, struct page, lru));
-			for (i = 0; i < (1UL << order); i++)
-				SetPageNosaveFree(pfn_to_page(pfn + i));
-		}
-
-	spin_unlock_irqrestore(&zone->lock, flags);
-}
-
 /*
  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
  */
@@ -2354,6 +2320,9 @@ __meminit int init_currently_empty_zone(
 	ret = zone_wait_table_init(zone, size);
 	if (ret)
 		return ret;
+	ret = suspend_flags_init(zone, size);
+	if (ret)
+		return ret;
 	pgdat->nr_zones = zone_idx(zone) + 1;
 
 	zone->zone_start_pfn = zone_start_pfn;
Index: linux-2.6.20-mm1/kernel/power/snapshot.c
===================================================================
--- linux-2.6.20-mm1.orig/kernel/power/snapshot.c	2007-02-16 01:46:02.000000000 -0800
+++ linux-2.6.20-mm1/kernel/power/snapshot.c	2007-02-16 01:59:24.000000000 -0800
@@ -34,6 +34,126 @@
 
 #include "power.h"
 
+static inline int PageNosave(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	return test_bit(offset * 2, zone->suspend_flags);
+}
+
+static inline void SetPageNosave(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	set_bit(offset * 2, zone->suspend_flags);
+}
+
+static inline int TestSetPageNosave(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	return test_and_set_bit(offset * 2, zone->suspend_flags);
+}
+
+static inline void ClearPageNosave(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	clear_bit(offset * 2, zone->suspend_flags);
+}
+
+static inline int TestClearPageNosave(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	return test_and_clear_bit(offset * 2, zone->suspend_flags);
+}
+
+
+static inline int PageNosaveFree(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	return test_bit(offset * 2 + 1, zone->suspend_flags);
+}
+
+static inline void SetPageNosaveFree(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	set_bit(offset * 2 + 1, zone->suspend_flags);
+}
+
+static inline void ClearPageNosaveFree(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
+
+	clear_bit(offset * 2 + 1, zone->suspend_flags);
+}
+
+int suspend_flags_init(struct zone *zone, unsigned long zone_size_pages)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	size_t alloc_size;
+
+	/*
+	 * We need two bits per page in the zone. One for PageNosave and the other
+	 * for PageNosaveFree.
+	 */
+	alloc_size = BITS_TO_LONGS(zone_size_pages * 2);
+ 	if (system_state == SYSTEM_BOOTING) {
+		zone->suspend_flags = (unsigned long *)
+			alloc_bootmem_node(pgdat, alloc_size);
+	} else
+		zone->suspend_flags = (unsigned long *)vmalloc(alloc_size);
+	if (!zone->suspend_flags)
+		return -ENOMEM;
+
+	bitmap_zero(zone->suspend_flags, 2 * zone_size_pages);
+	return 0;
+}
+
+void mark_free_pages(struct zone *zone)
+{
+	unsigned long pfn, max_zone_pfn;
+	unsigned long flags;
+	int order;
+	struct list_head *curr;
+
+	if (!zone->spanned_pages)
+		return;
+
+	spin_lock_irqsave(&zone->lock, flags);
+
+	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+
+			if (!PageNosave(page))
+				ClearPageNosaveFree(page);
+		}
+
+	for (order = MAX_ORDER - 1; order >= 0; --order)
+		list_for_each(curr, &zone->free_area[order].free_list) {
+			unsigned long i;
+
+			pfn = page_to_pfn(list_entry(curr, struct page, lru));
+			for (i = 0; i < (1UL << order); i++)
+				SetPageNosaveFree(pfn_to_page(pfn + i));
+		}
+
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
+
 /* List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-16 10:13 Remove page flags for software suspend Christoph Lameter
@ 2007-02-16 10:56 ` Rafael J. Wysocki
  2007-02-28 10:14   ` Pavel Machek
  2007-02-28 10:14   ` Pavel Machek
  0 siblings, 2 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-02-16 10:56 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-mm, Pavel Machek

On Friday, 16 February 2007 11:13, Christoph Lameter wrote:
> I think we can just move the flags completely into the kernel/power 
> directory? This centralizes all your handling of pageflags into snapshot.c 
> so that you need no external definitions anymore.

Yes, I think we can do it this way, but can we generally assume that the
offset for eg. test_bit() won't be taken modulo 32 (or 64)?

And ...

> Index: linux-2.6.20-mm1/include/linux/mmzone.h
> ===================================================================
> --- linux-2.6.20-mm1.orig/include/linux/mmzone.h	2007-02-16 01:11:46.000000000 -0800
> +++ linux-2.6.20-mm1/include/linux/mmzone.h	2007-02-16 01:12:23.000000000 -0800
> @@ -295,6 +295,7 @@ struct zone {
>  	unsigned long		spanned_pages;	/* total size, including holes */
>  	unsigned long		present_pages;	/* amount of memory (excluding holes) */
>  
> +	unsigned long		*suspend_flags;
>  	/*
>  	 * rarely used fields:
>  	 */
> Index: linux-2.6.20-mm1/include/linux/page-flags.h
> ===================================================================
> --- linux-2.6.20-mm1.orig/include/linux/page-flags.h	2007-02-16 01:05:26.000000000 -0800
> +++ linux-2.6.20-mm1/include/linux/page-flags.h	2007-02-16 01:16:45.000000000 -0800
> @@ -82,13 +82,11 @@
>  #define PG_private		11	/* If pagecache, has fs-private data */
>  
>  #define PG_writeback		12	/* Page is under writeback */
> -#define PG_nosave		13	/* Used for system suspend/resume */
>  #define PG_compound		14	/* Part of a compound page */
>  #define PG_swapcache		15	/* Swap page: swp_entry_t in private */
>  
>  #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
>  #define PG_reclaim		17	/* To be reclaimed asap */
> -#define PG_nosave_free		18	/* Used for system suspend/resume */
>  #define PG_buddy		19	/* Page is free, on buddy lists */
>  
>  #define PG_mlocked		20	/* Page is mlocked */
> @@ -192,16 +190,6 @@ static inline void SetPageUptodate(struc
>  #define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback,	\
>  							&(page)->flags)
>  
> -#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
> -#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
> -#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
> -#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
> -#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
> -
> -#define PageNosaveFree(page)	test_bit(PG_nosave_free, &(page)->flags)
> -#define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
> -#define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
> -
>  #define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
>  #define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
>  #define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)
> Index: linux-2.6.20-mm1/include/linux/suspend.h
> ===================================================================
> --- linux-2.6.20-mm1.orig/include/linux/suspend.h	2007-02-16 01:15:30.000000000 -0800
> +++ linux-2.6.20-mm1/include/linux/suspend.h	2007-02-16 01:57:51.000000000 -0800
> @@ -21,7 +22,6 @@ struct pbe {
>  
>  /* mm/page_alloc.c */
>  extern void drain_local_pages(void);
> -extern void mark_free_pages(struct zone *zone);
>  
>  #ifdef CONFIG_PM
>  /* kernel/power/swsusp.c */
> @@ -42,6 +42,18 @@ static inline int software_suspend(void)
>  }
>  #endif /* CONFIG_PM */
>  
> +#ifdef CONFIG_SOFTWARE_SUSPEND
> +int suspend_flags_init(struct zone *zone, unsigned long zone_size_pages);
> +void mark_free_pages(struct zone *zone);
> +#else
> +static inline int suspend_flags_init(struct zone *zone, unsigned long zone_size_pages)
> +{
> +	return 0;
> +}
> +
> +static inline void mark_free_pages(struct zone *zone) {}
> +#endif
> +
>  void save_processor_state(void);
>  void restore_processor_state(void);
>  struct saved_context;
> Index: linux-2.6.20-mm1/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.20-mm1.orig/mm/page_alloc.c	2007-02-16 01:22:09.000000000 -0800
> +++ linux-2.6.20-mm1/mm/page_alloc.c	2007-02-16 01:40:39.000000000 -0800
> @@ -767,40 +767,6 @@ static void __drain_pages(unsigned int c
>  }
>  
>  #ifdef CONFIG_PM
> -
> -void mark_free_pages(struct zone *zone)
> -{
> -	unsigned long pfn, max_zone_pfn;
> -	unsigned long flags;
> -	int order;
> -	struct list_head *curr;
> -
> -	if (!zone->spanned_pages)
> -		return;
> -
> -	spin_lock_irqsave(&zone->lock, flags);
> -
> -	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
> -	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> -		if (pfn_valid(pfn)) {
> -			struct page *page = pfn_to_page(pfn);
> -
> -			if (!PageNosave(page))
> -				ClearPageNosaveFree(page);
> -		}
> -
> -	for (order = MAX_ORDER - 1; order >= 0; --order)
> -		list_for_each(curr, &zone->free_area[order].free_list) {
> -			unsigned long i;
> -
> -			pfn = page_to_pfn(list_entry(curr, struct page, lru));
> -			for (i = 0; i < (1UL << order); i++)
> -				SetPageNosaveFree(pfn_to_page(pfn + i));
> -		}
> -
> -	spin_unlock_irqrestore(&zone->lock, flags);
> -}
> -
>  /*
>   * Spill all of this CPU's per-cpu pages back into the buddy allocator.
>   */
> @@ -2354,6 +2320,9 @@ __meminit int init_currently_empty_zone(
>  	ret = zone_wait_table_init(zone, size);
>  	if (ret)
>  		return ret;
> +	ret = suspend_flags_init(zone, size);
> +	if (ret)
> +		return ret;
>  	pgdat->nr_zones = zone_idx(zone) + 1;
>  
>  	zone->zone_start_pfn = zone_start_pfn;
> Index: linux-2.6.20-mm1/kernel/power/snapshot.c
> ===================================================================
> --- linux-2.6.20-mm1.orig/kernel/power/snapshot.c	2007-02-16 01:46:02.000000000 -0800
> +++ linux-2.6.20-mm1/kernel/power/snapshot.c	2007-02-16 01:59:24.000000000 -0800
> @@ -34,6 +34,126 @@
>  
>  #include "power.h"
>  
> +static inline int PageNosave(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	return test_bit(offset * 2, zone->suspend_flags);
> +}
> +
> +static inline void SetPageNosave(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	set_bit(offset * 2, zone->suspend_flags);
> +}
> +
> +static inline int TestSetPageNosave(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	return test_and_set_bit(offset * 2, zone->suspend_flags);

... I'd prefer

	unsigned long offset = (page_to_pfn(page) - zone->zone_start_pfn) << 1;

	return test_and_set_bit(offset, zone->suspend_flags);

> +}
> +
> +static inline void ClearPageNosave(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	clear_bit(offset * 2, zone->suspend_flags);
> +}
> +
> +static inline int TestClearPageNosave(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	return test_and_clear_bit(offset * 2, zone->suspend_flags);
> +}
> +
> +
> +static inline int PageNosaveFree(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	return test_bit(offset * 2 + 1, zone->suspend_flags);
> +}
> +
> +static inline void SetPageNosaveFree(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	set_bit(offset * 2 + 1, zone->suspend_flags);
> +}
> +
> +static inline void ClearPageNosaveFree(struct page *page)
> +{
> +	struct zone *zone = page_zone(page);
> +	unsigned long offset = page_to_pfn(page) - zone->zone_start_pfn;
> +
> +	clear_bit(offset * 2 + 1, zone->suspend_flags);
> +}
> +
> +int suspend_flags_init(struct zone *zone, unsigned long zone_size_pages)
> +{
> +	struct pglist_data *pgdat = zone->zone_pgdat;
> +	size_t alloc_size;
> +
> +	/*
> +	 * We need two bits per page in the zone. One for PageNosave and the other
> +	 * for PageNosaveFree.
> +	 */
> +	alloc_size = BITS_TO_LONGS(zone_size_pages * 2);
> + 	if (system_state == SYSTEM_BOOTING) {
> +		zone->suspend_flags = (unsigned long *)
> +			alloc_bootmem_node(pgdat, alloc_size);
> +	} else
> +		zone->suspend_flags = (unsigned long *)vmalloc(alloc_size);
> +	if (!zone->suspend_flags)
> +		return -ENOMEM;
> +
> +	bitmap_zero(zone->suspend_flags, 2 * zone_size_pages);
> +	return 0;
> +}
> +
> +void mark_free_pages(struct zone *zone)
> +{
> +	unsigned long pfn, max_zone_pfn;
> +	unsigned long flags;
> +	int order;
> +	struct list_head *curr;
> +
> +	if (!zone->spanned_pages)
> +		return;
> +
> +	spin_lock_irqsave(&zone->lock, flags);
> +
> +	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
> +	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> +		if (pfn_valid(pfn)) {
> +			struct page *page = pfn_to_page(pfn);
> +
> +			if (!PageNosave(page))
> +				ClearPageNosaveFree(page);
> +		}
> +
> +	for (order = MAX_ORDER - 1; order >= 0; --order)
> +		list_for_each(curr, &zone->free_area[order].free_list) {
> +			unsigned long i;
> +
> +			pfn = page_to_pfn(list_entry(curr, struct page, lru));
> +			for (i = 0; i < (1UL << order); i++)
> +				SetPageNosaveFree(pfn_to_page(pfn + i));
> +		}
> +
> +	spin_unlock_irqrestore(&zone->lock, flags);
> +}
> +
>  /* List of PBEs needed for restoring the pages that were allocated before
>   * the suspend and included in the suspend image, but have also been
>   * allocated by the "resume" kernel, so their contents cannot be written

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-16 10:56 ` Rafael J. Wysocki
@ 2007-02-28 10:14   ` Pavel Machek
  2007-02-28 15:25     ` Christoph Lameter
  2007-02-28 10:14   ` Pavel Machek
  1 sibling, 1 reply; 85+ messages in thread
From: Pavel Machek @ 2007-02-28 10:14 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Christoph Lameter, linux-mm

Hi!

> > I think we can just move the flags completely into the kernel/power 
> > directory? This centralizes all your handling of pageflags into snapshot.c 
> > so that you need no external definitions anymore.
> 
> Yes, I think we can do it this way, but can we generally assume that the
> offset for eg. test_bit() won't be taken modulo 32 (or 64)?

I... actually do not like that patch. It adds code... at little or no
benefit.

(And Nigel, please cc me when changing  kernel/power/*).
								Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-16 10:56 ` Rafael J. Wysocki
  2007-02-28 10:14   ` Pavel Machek
@ 2007-02-28 10:14   ` Pavel Machek
  1 sibling, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-02-28 10:14 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Christoph Lameter, linux-mm

Hi!

> > I think we can just move the flags completely into the kernel/power 
> > directory? This centralizes all your handling of pageflags into snapshot.c 
> > so that you need no external definitions anymore.
> 
> Yes, I think we can do it this way, but can we generally assume that the
> offset for eg. test_bit() won't be taken modulo 32 (or 64)?
> 
> And ...

Ouch... I somehow assumed it was Nigel doing this patch, and noticed
too late. Sorry.

I still wish to be cc-ed on swsusp changes.
								Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 10:14   ` Pavel Machek
@ 2007-02-28 15:25     ` Christoph Lameter
  2007-02-28 17:13       ` Rafael J. Wysocki
  2007-02-28 21:08       ` Pavel Machek
  0 siblings, 2 replies; 85+ messages in thread
From: Christoph Lameter @ 2007-02-28 15:25 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Rafael J. Wysocki, linux-mm

On Wed, 28 Feb 2007, Pavel Machek wrote:

> I... actually do not like that patch. It adds code... at little or no
> benefit.

We are looking into saving page flags since we are running out. The two 
page flags used by software suspend are rarely needed and should be taken 
out of the flags. If you can do it a different way then please do.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 15:25     ` Christoph Lameter
@ 2007-02-28 17:13       ` Rafael J. Wysocki
  2007-02-28 17:17         ` Christoph Lameter
  2007-03-01 15:18         ` Nick Piggin
  2007-02-28 21:08       ` Pavel Machek
  1 sibling, 2 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-02-28 17:13 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Pavel Machek, linux-mm

On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> On Wed, 28 Feb 2007, Pavel Machek wrote:
> 
> > I... actually do not like that patch. It adds code... at little or no
> > benefit.
> 
> We are looking into saving page flags since we are running out. The two 
> page flags used by software suspend are rarely needed and should be taken 
> out of the flags. If you can do it a different way then please do.

As I have already said for a couple of times, I think we can and I'm going to
do it, but right now I'm a bit busy with other things that I consider as more
urgent.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:13       ` Rafael J. Wysocki
@ 2007-02-28 17:17         ` Christoph Lameter
  2007-02-28 17:33           ` Rafael J. Wysocki
  2007-03-01  2:35           ` Nick Piggin
  2007-03-01 15:18         ` Nick Piggin
  1 sibling, 2 replies; 85+ messages in thread
From: Christoph Lameter @ 2007-02-28 17:17 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, linux-mm

On Wed, 28 Feb 2007, Rafael J. Wysocki wrote:

> As I have already said for a couple of times, I think we can and I'm going to
> do it, but right now I'm a bit busy with other things that I consider as more
> urgent.

Ummm.. There are other parties who would like to use these flags!

I think my patch localizes the suspend material properly. In fact there 
is *no* reason for the page flags to be visible outside of snapshot.c.
What is the problem with the patch?

How long will it take you to remove the flags on your own?
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:17         ` Christoph Lameter
@ 2007-02-28 17:33           ` Rafael J. Wysocki
  2007-02-28 17:35             ` Christoph Lameter
  2007-03-01  2:35           ` Nick Piggin
  1 sibling, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-02-28 17:33 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Pavel Machek, linux-mm

On Wednesday, 28 February 2007 18:17, Christoph Lameter wrote:
> On Wed, 28 Feb 2007, Rafael J. Wysocki wrote:
> 
> > As I have already said for a couple of times, I think we can and I'm going to
> > do it, but right now I'm a bit busy with other things that I consider as more
> > urgent.
> 
> Ummm.. There are other parties who would like to use these flags!

Yes, I know that.  On the other hand, we have terminally broken CPU hotplug
code in the kernel that I'd like to get fixed _first_.

> I think my patch localizes the suspend material properly. In fact there 
> is *no* reason for the page flags to be visible outside of snapshot.c.

Yes, there is.  PageNosave should be visible to the architectures so that they
can mark nosave pages for the suspend.

> What is the problem with the patch?

PageNosaveFree is only needed at the suspend time, so we need not allocate
it in advance.
 
> How long will it take you to remove the flags on your own?

I can't promise anything, because that also depends on people who work
on the CPU hotplug, workqueues etc. and these things are hard.  I'll do that
as soon as I can.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:33           ` Rafael J. Wysocki
@ 2007-02-28 17:35             ` Christoph Lameter
  2007-02-28 17:51               ` Rafael J. Wysocki
  0 siblings, 1 reply; 85+ messages in thread
From: Christoph Lameter @ 2007-02-28 17:35 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, linux-mm

On Wed, 28 Feb 2007, Rafael J. Wysocki wrote:

> Yes, I know that.  On the other hand, we have terminally broken CPU hotplug
> code in the kernel that I'd like to get fixed _first_.

The cpu hotplug code has been terminal for years.

> PageNosaveFree is only needed at the suspend time, so we need not allocate
> it in advance.

Well it should be simple to change the patch to allocate the bitmaps 
later.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:35             ` Christoph Lameter
@ 2007-02-28 17:51               ` Rafael J. Wysocki
  2007-02-28 17:56                 ` Christoph Lameter
  0 siblings, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-02-28 17:51 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Pavel Machek, linux-mm

On Wednesday, 28 February 2007 18:35, Christoph Lameter wrote:
> On Wed, 28 Feb 2007, Rafael J. Wysocki wrote:
> 
> > Yes, I know that.  On the other hand, we have terminally broken CPU hotplug
> > code in the kernel that I'd like to get fixed _first_.
> 
> The cpu hotplug code has been terminal for years.

But recently it's been becoming a real pain.

> > PageNosaveFree is only needed at the suspend time, so we need not allocate
> > it in advance.
> 
> Well it should be simple to change the patch to allocate the bitmaps 
> later.

Well, yes, I think so.  Still, there may be another way of doing it and I need
some time to have a look.

BTW, have you tested the patch?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:51               ` Rafael J. Wysocki
@ 2007-02-28 17:56                 ` Christoph Lameter
  2007-02-28 21:11                   ` Pavel Machek
  0 siblings, 1 reply; 85+ messages in thread
From: Christoph Lameter @ 2007-02-28 17:56 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, linux-mm

On Wed, 28 Feb 2007, Rafael J. Wysocki wrote:

> Well, yes, I think so.  Still, there may be another way of doing it and I need
> some time to have a look.
> 
> BTW, have you tested the patch?

Nope. Sorry, have no use for software suspend.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 15:25     ` Christoph Lameter
  2007-02-28 17:13       ` Rafael J. Wysocki
@ 2007-02-28 21:08       ` Pavel Machek
  2007-02-28 21:16         ` Christoph Lameter
  2007-03-01  2:31         ` Nick Piggin
  1 sibling, 2 replies; 85+ messages in thread
From: Pavel Machek @ 2007-02-28 21:08 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Rafael J. Wysocki, linux-mm

Hi!

> > I... actually do not like that patch. It adds code... at little or no
> > benefit.
> 
> We are looking into saving page flags since we are running out. The two 
> page flags used by software suspend are rarely needed and should be taken 
> out of the flags. If you can do it a different way then please do.

Hmm, can't we just add another word to struct page?

Plus we really need PageNosave from boot on...

							Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:56                 ` Christoph Lameter
@ 2007-02-28 21:11                   ` Pavel Machek
  0 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-02-28 21:11 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Rafael J. Wysocki, linux-mm

Hi!

> > Well, yes, I think so.  Still, there may be another way of doing it and I need
> > some time to have a look.
> > 
> > BTW, have you tested the patch?
> 
> Nope. Sorry, have no use for software suspend.

It is a matter of adding resume=/dev/hda9 to kernel cmdline and then echo disk
> /sys/power/state... I guess that is not too much to ask?

Anyway, if you want to submit patch to swsusp, please test it, and cc
me on that mail.
							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 21:08       ` Pavel Machek
@ 2007-02-28 21:16         ` Christoph Lameter
  2007-02-28 21:22           ` Pavel Machek
  2007-03-01  2:31         ` Nick Piggin
  1 sibling, 1 reply; 85+ messages in thread
From: Christoph Lameter @ 2007-02-28 21:16 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Rafael J. Wysocki, linux-mm

On Wed, 28 Feb 2007, Pavel Machek wrote:

> Hmm, can't we just add another word to struct page?
> 
> Plus we really need PageNosave from boot on...

Well it would be great to get the story straight. First I was told that 
the bitmaps can be allocated later. Now we dont. The current patch should 
do what you want.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 21:16         ` Christoph Lameter
@ 2007-02-28 21:22           ` Pavel Machek
  2007-02-28 22:23             ` Rafael J. Wysocki
  0 siblings, 1 reply; 85+ messages in thread
From: Pavel Machek @ 2007-02-28 21:22 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Rafael J. Wysocki, linux-mm

On Wed 2007-02-28 13:16:51, Christoph Lameter wrote:
> On Wed, 28 Feb 2007, Pavel Machek wrote:
> 
> > Hmm, can't we just add another word to struct page?

?

> > Plus we really need PageNosave from boot on...
> 
> Well it would be great to get the story straight. First I was told that 
> the bitmaps can be allocated later. Now we dont. The current patch should 
> do what you want.

PageNosave is set by boot code in some cases, as Rafael told you.

It may be possible to redo boot processing at suspend time, but it
would be ugly can of worms, I'm not sure if BIOS data are still
available, and neccessary functions are probably __init.

							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 21:22           ` Pavel Machek
@ 2007-02-28 22:23             ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-02-28 22:23 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Christoph Lameter, linux-mm

On Wednesday, 28 February 2007 22:22, Pavel Machek wrote:
> On Wed 2007-02-28 13:16:51, Christoph Lameter wrote:
> > On Wed, 28 Feb 2007, Pavel Machek wrote:
> > 
> > > Hmm, can't we just add another word to struct page?
> 
> ?

That would be wasteful, I think, and PageNosaveFree isn't really needed
outside the swsusp code.

> > > Plus we really need PageNosave from boot on...

Yes.

> > 
> > Well it would be great to get the story straight. First I was told that 
> > the bitmaps can be allocated later. Now we dont. The current patch should 
> > do what you want.

Nope.  There are two flags and one of them (PageNosaveFree) can and really
should be allocated later, while the other (PageNosave) is needed from the
start.
 
> PageNosave is set by boot code in some cases, as Rafael told you.
> 
> It may be possible to redo boot processing at suspend time, but it
> would be ugly can of worms, I'm not sure if BIOS data are still
> available, and neccessary functions are probably __init.

Exactly.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 21:08       ` Pavel Machek
  2007-02-28 21:16         ` Christoph Lameter
@ 2007-03-01  2:31         ` Nick Piggin
  1 sibling, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-01  2:31 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Christoph Lameter, Rafael J. Wysocki, linux-mm

Pavel Machek wrote:
> Hi!
> 
> 
>>>I... actually do not like that patch. It adds code... at little or no
>>>benefit.
>>
>>We are looking into saving page flags since we are running out. The two 
>>page flags used by software suspend are rarely needed and should be taken 
>>out of the flags. If you can do it a different way then please do.
> 
> 
> Hmm, can't we just add another word to struct page?

That's what we want to avoid. As soon as we add another word, then
everbody goes crazy using it up and we can never remove it again.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:17         ` Christoph Lameter
  2007-02-28 17:33           ` Rafael J. Wysocki
@ 2007-03-01  2:35           ` Nick Piggin
  1 sibling, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-01  2:35 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Rafael J. Wysocki, Pavel Machek, linux-mm

Christoph Lameter wrote:
> On Wed, 28 Feb 2007, Rafael J. Wysocki wrote:
> 
> 
>>As I have already said for a couple of times, I think we can and I'm going to
>>do it, but right now I'm a bit busy with other things that I consider as more
>>urgent.
> 
> 
> Ummm.. There are other parties who would like to use these flags!

Lots of other parties. Let's make sure that no more backdoor page flags get
allocated without going through the linux-mm list to work out whether we
really need it or can live without it...

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-02-28 17:13       ` Rafael J. Wysocki
  2007-02-28 17:17         ` Christoph Lameter
@ 2007-03-01 15:18         ` Nick Piggin
  2007-03-01 15:33           ` Rafael J. Wysocki
                             ` (2 more replies)
  1 sibling, 3 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-01 15:18 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Christoph Lameter, Pavel Machek, linux-mm

[-- Attachment #1: Type: text/plain, Size: 1195 bytes --]

Rafael J. Wysocki wrote:
> On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> 
>>On Wed, 28 Feb 2007, Pavel Machek wrote:
>>
>>
>>>I... actually do not like that patch. It adds code... at little or no
>>>benefit.
>>
>>We are looking into saving page flags since we are running out. The two 
>>page flags used by software suspend are rarely needed and should be taken 
>>out of the flags. If you can do it a different way then please do.
> 
> 
> As I have already said for a couple of times, I think we can and I'm going to
> do it, but right now I'm a bit busy with other things that I consider as more
> urgent.

I need one bit for lockless pagecache ;)

Anyway, I guess if you want something done you have to do it yourself.

This patch still needs work (and I don't know if it even works, because
I can't make swsusp resume even on a vanilla kernel). But this is my
WIP for removing swsusp page flags.

This patch adds a simple extent based nosave region tracker, and
rearranges some of the snapshot code to be a bit simpler and more
amenable to having dynamically allocated flags (they aren't actually
dynamically allocated in this patch, however).

-- 
SUSE Labs, Novell Inc.

[-- Attachment #2: swsusp-Nosave-use-extents.patch --]
[-- Type: text/plain, Size: 23907 bytes --]

---
 arch/x86_64/kernel/e820.c  |    8 --
 include/linux/page-flags.h |   33 ++++----
 include/linux/suspend.h    |   12 ++-
 kernel/power/Makefile      |    2 
 kernel/power/main.c        |    4 +
 kernel/power/nosave.c      |  103 +++++++++++++++++++++++++++
 kernel/power/power.h       |    1 
 kernel/power/snapshot.c    |  167 ++++++++++++++++++++++-----------------------
 mm/page_alloc.c            |   34 ---------
 9 files changed, 222 insertions(+), 142 deletions(-)

Index: linux-2.6/include/linux/suspend.h
===================================================================
--- linux-2.6.orig/include/linux/suspend.h	2007-03-02 01:51:08.000000000 +1100
+++ linux-2.6/include/linux/suspend.h	2007-03-02 02:06:07.000000000 +1100
@@ -21,7 +21,6 @@ struct pbe {
 
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
-extern void mark_free_pages(struct zone *zone);
 
 #ifdef CONFIG_PM
 /* kernel/power/swsusp.c */
@@ -42,6 +41,17 @@ static inline int software_suspend(void)
 }
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/* kernel/power/nosave.c */
+extern int __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn);
+#else
+static inline int register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	return 0;
+}
+#endif
+
+
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
Index: linux-2.6/kernel/power/Makefile
===================================================================
--- linux-2.6.orig/kernel/power/Makefile	2007-03-02 01:51:08.000000000 +1100
+++ linux-2.6/kernel/power/Makefile	2007-03-02 02:06:07.000000000 +1100
@@ -5,6 +5,6 @@ endif
 
 obj-y				:= main.o process.o console.o
 obj-$(CONFIG_PM_LEGACY)		+= pm.o
-obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o swap.o user.o
+obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o swap.o user.o nosave.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
Index: linux-2.6/kernel/power/nosave.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/kernel/power/nosave.c	2007-03-02 02:06:07.000000000 +1100
@@ -0,0 +1,103 @@
+/*
+ * Provide a way of tracking a set of "nosave" pfn ranges, registered by
+ * architecture code, and used by swsusp when deciding what memory to
+ * save.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+
+struct nosave_extent {
+	struct rb_node rb_node;
+	unsigned long start;
+	unsigned long end;
+};
+
+static spinlock_t nosave_lock = SPIN_LOCK_UNLOCKED;
+static struct rb_root nosave_extents = RB_ROOT;
+
+#define rb_entry_extent(node)	rb_entry(node, struct nosave_extent, rb_node)
+#define rb_next_extent(node)	rb_entry_extent(rb_next(node))
+#define rb_prev_extent(node)	rb_entry_extent(rb_prev(node))
+
+static int __is_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct rb_node *n = nosave_extents.rb_node;
+
+	BUG_ON(end_pfn < start_pfn);
+
+	while (n) {
+		struct nosave_extent *ext = rb_entry_extent(n);
+
+		if (start_pfn < ext->start) {
+			if (end_pfn >= ext->start)
+				return 1;
+			n = n->rb_left;
+		} else if (end_pfn > ext->end) {
+			if (start_pfn <= ext->end)
+				return 1;
+			n = n->rb_right;
+		} else
+			return 1;
+	}
+
+	return 0;
+}
+
+static int is_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	int ret;
+	spin_lock(&nosave_lock);
+	ret = __is_nosave_region(start_pfn, end_pfn);
+	spin_unlock(&nosave_lock);
+	return ret;
+}
+
+int __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct rb_node **p = &nosave_extents.rb_node;
+	struct rb_node *parent = NULL;
+	struct nosave_extent *ext, *new_extent;
+	int ret = 0;
+
+	spin_lock(&nosave_lock);
+	if (__is_nosave_region(start_pfn, end_pfn)) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	new_extent = kmalloc(sizeof(struct nosave_extent), GFP_ATOMIC);
+	if (!new_extent) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	new_extent->start = start_pfn;
+	new_extent->end = end_pfn;
+
+	while (*p) {
+		parent = *p;
+		ext = rb_entry_extent(parent);
+
+		if (start_pfn < ext->start)
+			p = &(*p)->rb_left;
+		else if (start_pfn > ext->start)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new_extent->rb_node, parent, p);
+	rb_insert_color(&new_extent->rb_node, &nosave_extents);
+
+out:
+	spin_unlock(&nosave_lock);
+	return ret;
+}
+
+int is_nosave_pfn(unsigned long pfn)
+{
+	return is_nosave_region(pfn, pfn);
+}
Index: linux-2.6/kernel/power/power.h
===================================================================
--- linux-2.6.orig/kernel/power/power.h	2007-03-02 01:51:08.000000000 +1100
+++ linux-2.6/kernel/power/power.h	2007-03-02 02:06:07.000000000 +1100
@@ -15,6 +15,7 @@ struct swsusp_info {
 
 #ifdef CONFIG_SOFTWARE_SUSPEND
 extern int pm_suspend_disk(void);
+extern int is_nosave_pfn(unsigned long pfn);
 
 #else
 static inline int pm_suspend_disk(void)
Index: linux-2.6/kernel/power/snapshot.c
===================================================================
--- linux-2.6.orig/kernel/power/snapshot.c	2007-03-02 01:51:08.000000000 +1100
+++ linux-2.6/kernel/power/snapshot.c	2007-03-02 02:16:10.000000000 +1100
@@ -47,17 +47,15 @@ static void *buffer;
 /**
  *	@safe_needed - on resume, for storing the PBE list and the image,
  *	we can only use memory pages that do not conflict with the pages
- *	used before suspend.  The unsafe pages have PageNosaveFree set
- *	and we count them using unsafe_pages.
+ *	saved before suspend. mark_saved_pages finds these and marks them
+ *	with PageSwsuspSaved.
  *
- *	Each allocated image page is marked as PageNosave and PageNosaveFree
- *	so that swsusp_free() can release it.
+ *	Each allocated image page is marked as PageSwsuspImage so that
+ *	swsusp_free() will release it.
  */
 
 #define PG_ANY		0
 #define PG_SAFE		1
-#define PG_UNSAFE_CLEAR	1
-#define PG_UNSAFE_KEEP	0
 
 static unsigned int allocated_unsafe_pages;
 
@@ -66,21 +64,24 @@ static void *get_image_page(gfp_t gfp_ma
 	void *res;
 
 	res = (void *)get_zeroed_page(gfp_mask);
-	if (safe_needed)
-		while (res && PageNosaveFree(virt_to_page(res))) {
-			/* The page is unsafe, mark it for swsusp_free() */
-			SetPageNosave(virt_to_page(res));
+#if 0
+	if (safe_needed) /* XXX: should _always_ allocate "safe" pages */
+#endif
+		while (res && PageSwsuspSaved(virt_to_page(res))) {
+			/*
+			 * Not actually an image page, but we must mark it
+			 * as such for swsusp_free()
+			 */
+			SetPageSwsuspImage(virt_to_page(res));
 			allocated_unsafe_pages++;
 			res = (void *)get_zeroed_page(gfp_mask);
 		}
-	if (res) {
-		SetPageNosave(virt_to_page(res));
-		SetPageNosaveFree(virt_to_page(res));
-	}
+	if (res)
+		SetPageSwsuspImage(virt_to_page(res));
 	return res;
 }
 
-unsigned long get_safe_page(gfp_t gfp_mask)
+unsigned long get_safe_page(gfp_t gfp_mask) /*  XXX: used in arch code */
 {
 	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
 }
@@ -90,10 +91,8 @@ static struct page *alloc_image_page(gfp
 	struct page *page;
 
 	page = alloc_page(gfp_mask);
-	if (page) {
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
-	}
+	if (page)
+		SetPageSwsuspImage(page);
 	return page;
 }
 
@@ -102,7 +101,7 @@ static struct page *alloc_image_page(gfp
  *	get_image_page (page flags set by it must be cleared)
  */
 
-static inline void free_image_page(void *addr, int clear_nosave_free)
+static inline void free_image_page(void *addr)
 {
 	struct page *page;
 
@@ -110,9 +109,8 @@ static inline void free_image_page(void 
 
 	page = virt_to_page(addr);
 
-	ClearPageNosave(page);
-	if (clear_nosave_free)
-		ClearPageNosaveFree(page);
+	BUG_ON(!PageSwsuspImage(page));
+	ClearPageSwsuspImage(page);
 
 	__free_page(page);
 }
@@ -127,12 +125,12 @@ struct linked_page {
 } __attribute__((packed));
 
 static inline void
-free_list_of_pages(struct linked_page *list, int clear_page_nosave)
+free_list_of_pages(struct linked_page *list)
 {
 	while (list) {
 		struct linked_page *lp = list->next;
 
-		free_image_page(list, clear_page_nosave);
+		free_image_page(list);
 		list = lp;
 	}
 }
@@ -188,9 +186,9 @@ static void *chain_alloc(struct chain_al
 	return ret;
 }
 
-static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
+static void chain_free(struct chain_allocator *ca)
 {
-	free_list_of_pages(ca->chain, clear_page_nosave);
+	free_list_of_pages(ca->chain);
 	memset(ca, 0, sizeof(struct chain_allocator));
 }
 
@@ -289,7 +287,7 @@ static void memory_bm_position_reset(str
 	memory_bm_reset_chunk(bm);
 }
 
-static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+static void memory_bm_free(struct memory_bitmap *bm);
 
 /**
  *	create_bm_block_list - create a list of block bitmap objects
@@ -360,7 +358,7 @@ memory_bm_create(struct memory_bitmap *b
 	zone_bm = create_zone_bm_list(nr, &ca);
 	bm->zone_bm_list = zone_bm;
 	if (!zone_bm) {
-		chain_free(&ca, PG_UNSAFE_CLEAR);
+		chain_free(&ca);
 		return -ENOMEM;
 	}
 
@@ -413,7 +411,7 @@ memory_bm_create(struct memory_bitmap *b
 
  Free:
 	bm->p_list = ca.chain;
-	memory_bm_free(bm, PG_UNSAFE_CLEAR);
+	memory_bm_free(bm);
 	return -ENOMEM;
 }
 
@@ -421,7 +419,7 @@ memory_bm_create(struct memory_bitmap *b
   *	memory_bm_free - free memory occupied by the memory bitmap @bm
   */
 
-static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
+static void memory_bm_free(struct memory_bitmap *bm)
 {
 	struct zone_bitmap *zone_bm;
 
@@ -433,12 +431,12 @@ static void memory_bm_free(struct memory
 		bb = zone_bm->bm_blocks;
 		while (bb) {
 			if (bb->data)
-				free_image_page(bb->data, clear_nosave_free);
+				free_image_page(bb->data);
 			bb = bb->next;
 		}
 		zone_bm = zone_bm->next;
 	}
-	free_list_of_pages(bm->p_list, clear_nosave_free);
+	free_list_of_pages(bm->p_list);
 	bm->zone_bm_list = NULL;
 }
 
@@ -600,8 +598,9 @@ static unsigned int count_free_highmem_p
  *	saveable_highmem_page - Determine whether a highmem page should be
  *	included in the suspend image.
  *
- *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
- *	and it isn't a part of a free chunk of pages.
+ *	We should save the page if it isn't in a nosave_pfn region, and if
+ *	it isn't already free (page_count()==0), and if it isn't part of our
+ *	suspend image.
  */
 
 static struct page *saveable_highmem_page(unsigned long pfn)
@@ -611,11 +610,22 @@ static struct page *saveable_highmem_pag
 	if (!pfn_valid(pfn))
 		return NULL;
 
+	if (is_nosave_pfn(pfn))
+		return NULL;
+
 	page = pfn_to_page(pfn);
 
 	BUG_ON(!PageHighMem(page));
 
-	if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
+	/* XXX: this is racy if there is other stuff running... we
+	 * could just use the free page count to estimate this for
+	 * use in shrink_memory.
+	 */
+	if (page_count(page) == 0)
+		return NULL;
+
+	/* XXX: really need to check PageReserved? */
+	if (PageReserved(page) || PageSwsuspImage(page))
 		return NULL;
 
 	return page;
@@ -637,7 +647,6 @@ unsigned int count_highmem_pages(void)
 		if (!is_highmem(zone))
 			continue;
 
-		mark_free_pages(zone);
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if (saveable_highmem_page(pfn))
@@ -651,23 +660,11 @@ static inline unsigned int count_highmem
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-static inline int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
-	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-
-/**
  *	saveable - Determine whether a non-highmem page should be included in
  *	the suspend image.
  *
- *	We should save the page if it isn't Nosave, and is not in the range
- *	of pages statically defined as 'unsaveable', and it isn't a part of
- *	a free chunk of pages.
+ *	We should save the page based on criteria similar to
+ *	saveable_highmem_page.
  */
 
 static struct page *saveable_page(unsigned long pfn)
@@ -677,14 +674,17 @@ static struct page *saveable_page(unsign
 	if (!pfn_valid(pfn))
 		return NULL;
 
+	if (is_nosave_pfn(pfn))
+		return NULL;
+
 	page = pfn_to_page(pfn);
 
 	BUG_ON(PageHighMem(page));
 
-	if (PageNosave(page) || PageNosaveFree(page))
+	if (page_count(page) == 0)
 		return NULL;
 
-	if (PageReserved(page) && pfn_is_nosave(pfn))
+	if (PageSwsuspImage(page))
 		return NULL;
 
 	return page;
@@ -705,7 +705,6 @@ unsigned int count_data_pages(void)
 		if (is_highmem(zone))
 			continue;
 
-		mark_free_pages(zone);
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if(saveable_page(pfn))
@@ -783,11 +782,14 @@ copy_data_pages(struct memory_bitmap *co
 	for_each_zone(zone) {
 		unsigned long max_zone_pfn;
 
-		mark_free_pages(zone);
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
-		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-			if (page_is_saveable(zone, pfn))
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
+			struct page *page = page_is_saveable(zone, pfn);
+			if (page) {
+				SetPageSwsuspSaved(page);
 				memory_bm_set_bit(orig_bm, pfn);
+			}
+		}
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
@@ -821,9 +823,10 @@ void swsusp_free(void)
 			if (pfn_valid(pfn)) {
 				struct page *page = pfn_to_page(pfn);
 
-				if (PageNosave(page) && PageNosaveFree(page)) {
-					ClearPageNosave(page);
-					ClearPageNosaveFree(page);
+				if (PageSwsuspSaved(page))
+					ClearPageSwsuspSaved(page);
+				if (PageSwsuspImage(page)) {
+					ClearPageSwsuspImage(page);
 					__free_page(page);
 				}
 			}
@@ -1131,22 +1134,21 @@ int snapshot_read_next(struct snapshot_h
 }
 
 /**
- *	mark_unsafe_pages - mark the pages that cannot be used for storing
+ *	mark_saved_pages - mark the pages that cannot be used for storing
  *	the image during resume, because they conflict with the pages that
- *	had been used before suspend
+ *	had been in use before suspending (ie. they were "saved").
  */
 
-static int mark_unsafe_pages(struct memory_bitmap *bm)
+static int mark_saved_pages(struct memory_bitmap *bm)
 {
 	struct zone *zone;
 	unsigned long pfn, max_zone_pfn;
 
-	/* Clear page flags */
+	/* Check page flags */
 	for_each_zone(zone) {
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-			if (pfn_valid(pfn))
-				ClearPageNosaveFree(pfn_to_page(pfn));
+			WARN_ON(pfn_valid(pfn) && PageSwsuspSaved(pfn_to_page(pfn)));
 	}
 
 	/* Mark pages that correspond to the "original" pfns as "unsafe" */
@@ -1155,7 +1157,7 @@ static int mark_unsafe_pages(struct memo
 		pfn = memory_bm_next_pfn(bm);
 		if (likely(pfn != BM_END_OF_MAP)) {
 			if (likely(pfn_valid(pfn)))
-				SetPageNosaveFree(pfn_to_page(pfn));
+				SetPageSwsuspSaved(pfn_to_page(pfn));
 			else
 				return -EFAULT;
 		}
@@ -1321,14 +1323,13 @@ prepare_highmem_image(struct memory_bitm
 		struct page *page;
 
 		page = alloc_page(__GFP_HIGHMEM);
-		if (!PageNosaveFree(page)) {
+		if (!PageSwsuspSaved(page)) {
 			/* The page is "safe", set its bit the bitmap */
 			memory_bm_set_bit(bm, page_to_pfn(page));
 			safe_highmem_pages++;
 		}
 		/* Mark the page as allocated */
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
+		SetPageSwsuspImage(page);
 	}
 	memory_bm_position_reset(bm);
 	safe_highmem_bm = bm;
@@ -1360,7 +1361,7 @@ get_highmem_page_buffer(struct page *pag
 	struct highmem_pbe *pbe;
 	void *kaddr;
 
-	if (PageNosave(page) && PageNosaveFree(page)) {
+	if (PageSwsuspSaved(page) && PageSwsuspImage(page)) {
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
@@ -1422,10 +1423,10 @@ static inline int last_highmem_page_copi
 static inline void free_highmem_data(void)
 {
 	if (safe_highmem_bm)
-		memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
+		memory_bm_free(safe_highmem_bm);
 
 	if (buffer)
-		free_image_page(buffer, PG_UNSAFE_CLEAR);
+		free_image_page(buffer);
 }
 #else
 static inline int get_safe_write_buffer(void) { return 0; }
@@ -1474,11 +1475,11 @@ prepare_image(struct memory_bitmap *new_
 	int error;
 
 	/* If there is no highmem, the buffer will not be necessary */
-	free_image_page(buffer, PG_UNSAFE_CLEAR);
+	free_image_page(buffer);
 	buffer = NULL;
 
 	nr_highmem = count_highmem_image_pages(bm);
-	error = mark_unsafe_pages(bm);
+	error = mark_saved_pages(bm);
 	if (error)
 		goto Free;
 
@@ -1487,7 +1488,8 @@ prepare_image(struct memory_bitmap *new_
 		goto Free;
 
 	duplicate_memory_bitmap(new_bm, bm);
-	memory_bm_free(bm, PG_UNSAFE_KEEP);
+	memory_bm_free(bm);
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -1522,20 +1524,19 @@ prepare_image(struct memory_bitmap *new_
 			error = -ENOMEM;
 			goto Free;
 		}
-		if (!PageNosaveFree(virt_to_page(lp))) {
+		if (!PageSwsuspSaved(virt_to_page(lp))) {
 			/* The page is "safe", add it to the list */
 			lp->next = safe_pages_list;
 			safe_pages_list = lp;
 		}
 		/* Mark the page as allocated */
-		SetPageNosave(virt_to_page(lp));
-		SetPageNosaveFree(virt_to_page(lp));
+		SetPageSwsuspImage(virt_to_page(lp));
 		nr_pages--;
 	}
 	/* Free the reserved safe pages so that chain_alloc() can use them */
 	while (sp_list) {
 		lp = sp_list->next;
-		free_image_page(sp_list, PG_UNSAFE_CLEAR);
+		free_image_page(sp_list);
 		sp_list = lp;
 	}
 	return 0;
@@ -1558,7 +1559,7 @@ static void *get_buffer(struct memory_bi
 	if (PageHighMem(page))
 		return get_highmem_page_buffer(page, ca);
 
-	if (PageNosave(page) && PageNosaveFree(page))
+	if (PageSwsuspSaved(page) && PageSwsuspImage(page))
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
@@ -1680,7 +1681,7 @@ void snapshot_write_finalize(struct snap
 	copy_last_highmem_page();
 	/* Free only if we have loaded the image entirely */
 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
-		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+		memory_bm_free(&orig_bm);
 		free_highmem_data();
 	}
 }
@@ -1733,7 +1734,7 @@ int restore_highmem(void)
 		swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
 		pbe = pbe->next;
 	}
-	free_image_page(buf, PG_UNSAFE_CLEAR);
+	free_image_page(buf);
 	return 0;
 }
 #endif /* CONFIG_HIGHMEM */
Index: linux-2.6/arch/x86_64/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/e820.c	2007-03-02 01:51:09.000000000 +1100
+++ linux-2.6/arch/x86_64/kernel/e820.c	2007-03-02 02:06:07.000000000 +1100
@@ -259,16 +259,12 @@ void __init e820_reserve_resources(void)
 static void __init
 e820_mark_nosave_range(unsigned long start, unsigned long end)
 {
-	unsigned long pfn, max_pfn;
-
 	if (start >= end)
 		return;
 
 	printk("Nosave address range: %016lx - %016lx\n", start, end);
-	max_pfn = end >> PAGE_SHIFT;
-	for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
-		if (pfn_valid(pfn))
-			SetPageNosave(pfn_to_page(pfn));
+	if (register_nosave_region(start >> PAGE_SHIFT, end >> PAGE_SHIFT))
+		panic("Could not register nosave region\n");
 }
 
 /*
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h	2007-03-02 01:51:08.000000000 +1100
+++ linux-2.6/include/linux/page-flags.h	2007-03-02 02:06:07.000000000 +1100
@@ -82,14 +82,15 @@
 #define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
-#define PG_nosave		13	/* Used for system suspend/resume */
-#define PG_compound		14	/* Part of a compound page */
-#define PG_swapcache		15	/* Swap page: swp_entry_t in private */
-
-#define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
-#define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Used for system suspend/resume */
-#define PG_buddy		19	/* Page is free, on buddy lists */
+#define PG_compound		13	/* Part of a compound page */
+#define PG_swapcache		14	/* Swap page: swp_entry_t in private */
+#define PG_mappedtodisk		15	/* Has blocks allocated on-disk */
+
+#define PG_reclaim		16	/* To be reclaimed asap */
+#define PG_buddy		17	/* Page is free, on buddy lists */
+
+#define PG_swsusp_image		18
+#define PG_swsusp_saved		19
 
 /* PG_owner_priv_1 users should have descriptive aliases */
 #define PG_checked		PG_owner_priv_1 /* Used by some filesystems */
@@ -214,15 +215,13 @@ static inline void SetPageUptodate(struc
 		ret;							\
 	})
 
-#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
-#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
-#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
-#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
-#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
-
-#define PageNosaveFree(page)	test_bit(PG_nosave_free, &(page)->flags)
-#define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
-#define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
+#define PageSwsuspImage(page)	test_bit(PG_swsusp_image, &(page)->flags)
+#define SetPageSwsuspImage(page) set_bit(PG_swsusp_image, &(page)->flags)
+#define ClearPageSwsuspImage(page) clear_bit(PG_swsusp_image, &(page)->flags)
+
+#define PageSwsuspSaved(page)	test_bit(PG_swsusp_saved, &(page)->flags)
+#define SetPageSwsuspSaved(page) set_bit(PG_swsusp_saved, &(page)->flags)
+#define ClearPageSwsuspSaved(page) clear_bit(PG_swsusp_saved, &(page)->flags)
 
 #define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
 #define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
Index: linux-2.6/kernel/power/main.c
===================================================================
--- linux-2.6.orig/kernel/power/main.c	2007-03-02 01:51:09.000000000 +1100
+++ linux-2.6/kernel/power/main.c	2007-03-02 02:06:07.000000000 +1100
@@ -334,6 +334,10 @@ static int __init pm_init(void)
 	int error = subsystem_register(&power_subsys);
 	if (!error)
 		error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
+	if (!error)
+		error = register_nosave_region(
+				__pa(&__nosave_begin) >> PAGE_SHIFT,
+				PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT);
 	return error;
 }
 
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c	2007-03-02 01:51:09.000000000 +1100
+++ linux-2.6/mm/page_alloc.c	2007-03-02 02:06:07.000000000 +1100
@@ -752,40 +752,6 @@ static void __drain_pages(unsigned int c
 }
 
 #ifdef CONFIG_PM
-
-void mark_free_pages(struct zone *zone)
-{
-	unsigned long pfn, max_zone_pfn;
-	unsigned long flags;
-	int order;
-	struct list_head *curr;
-
-	if (!zone->spanned_pages)
-		return;
-
-	spin_lock_irqsave(&zone->lock, flags);
-
-	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
-	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!PageNosave(page))
-				ClearPageNosaveFree(page);
-		}
-
-	for (order = MAX_ORDER - 1; order >= 0; --order)
-		list_for_each(curr, &zone->free_area[order].free_list) {
-			unsigned long i;
-
-			pfn = page_to_pfn(list_entry(curr, struct page, lru));
-			for (i = 0; i < (1UL << order); i++)
-				SetPageNosaveFree(pfn_to_page(pfn + i));
-		}
-
-	spin_unlock_irqrestore(&zone->lock, flags);
-}
-
 /*
  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
  */

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-03-01 15:18         ` Nick Piggin
@ 2007-03-01 15:33           ` Rafael J. Wysocki
  2007-03-01 23:10             ` Rafael J. Wysocki
  2007-03-04 13:50               ` Rafael J. Wysocki
  2007-03-01 17:48           ` Remove page flags for software suspend Hugh Dickins
  2007-03-01 20:46           ` Rafael J. Wysocki
  2 siblings, 2 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-01 15:33 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Lameter, Pavel Machek, linux-mm

On Thursday, 1 March 2007 16:18, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> > 
> >>On Wed, 28 Feb 2007, Pavel Machek wrote:
> >>
> >>
> >>>I... actually do not like that patch. It adds code... at little or no
> >>>benefit.
> >>
> >>We are looking into saving page flags since we are running out. The two 
> >>page flags used by software suspend are rarely needed and should be taken 
> >>out of the flags. If you can do it a different way then please do.
> > 
> > 
> > As I have already said for a couple of times, I think we can and I'm going to
> > do it, but right now I'm a bit busy with other things that I consider as more
> > urgent.
> 
> I need one bit for lockless pagecache ;)
> 
> Anyway, I guess if you want something done you have to do it yourself.
> 
> This patch still needs work (and I don't know if it even works, because
> I can't make swsusp resume even on a vanilla kernel). But this is my
> WIP for removing swsusp page flags.
> 
> This patch adds a simple extent based nosave region tracker, and
> rearranges some of the snapshot code to be a bit simpler and more
> amenable to having dynamically allocated flags (they aren't actually
> dynamically allocated in this patch, however).

Thanks for the patch.

Probably I'd like to do some things in a different way, I'll think about that
later today.

I hope I'll have a working patch that removes the "offending" page flags after
the weekend.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-03-01 15:18         ` Nick Piggin
  2007-03-01 15:33           ` Rafael J. Wysocki
@ 2007-03-01 17:48           ` Hugh Dickins
  2007-03-13  3:36             ` Nick Piggin
  2007-03-01 20:46           ` Rafael J. Wysocki
  2 siblings, 1 reply; 85+ messages in thread
From: Hugh Dickins @ 2007-03-01 17:48 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Rafael J. Wysocki, Christoph Lameter, Pavel Machek, linux-mm

On Thu, 1 Mar 2007, Nick Piggin wrote:
> 
> Let's make sure that no more backdoor page flags get allocated without
> going through the linux-mm list to work out whether we really need it
> or can live without it...

On Fri, 2 Mar 2007, Nick Piggin wrote:
> 
> I need one bit for lockless pagecache ;)

Is that still your PageNoNewRefs thing?
What was wrong with my atomic_cmpxchg suggestion?

Hugh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-03-01 15:18         ` Nick Piggin
  2007-03-01 15:33           ` Rafael J. Wysocki
  2007-03-01 17:48           ` Remove page flags for software suspend Hugh Dickins
@ 2007-03-01 20:46           ` Rafael J. Wysocki
  2007-03-02 10:17             ` Pavel Machek
  2 siblings, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-01 20:46 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Lameter, Pavel Machek, linux-mm

Hi,

On Thursday, 1 March 2007 16:18, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> > 
> >>On Wed, 28 Feb 2007, Pavel Machek wrote:
> >>
> >>
> >>>I... actually do not like that patch. It adds code... at little or no
> >>>benefit.
> >>
> >>We are looking into saving page flags since we are running out. The two 
> >>page flags used by software suspend are rarely needed and should be taken 
> >>out of the flags. If you can do it a different way then please do.
> > 
> > 
> > As I have already said for a couple of times, I think we can and I'm going to
> > do it, but right now I'm a bit busy with other things that I consider as more
> > urgent.
> 
> I need one bit for lockless pagecache ;)
> 
> Anyway, I guess if you want something done you have to do it yourself.
> 
> This patch still needs work (and I don't know if it even works, because
> I can't make swsusp resume even on a vanilla kernel).

That's interesting, BTW, because recently I've been having problems with
finding a machine on which it doesn't work. ;-)  If you could tell me (in
private) what the problems are, I'd try to help.

> But this is my WIP for removing swsusp page flags.
> 
> This patch adds a simple extent based nosave region tracker, and
> rearranges some of the snapshot code to be a bit simpler and more
> amenable to having dynamically allocated flags (they aren't actually
> dynamically allocated in this patch, however).

I like the idea of using just one bit for marking the allocated pages and the
simplifications it allows us to make, but is it really true that all of the
free pages and only the free pages will have page_count(page) == 0?

Also, the extents-related part is not exactly nice IMHO ...

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-03-01 15:33           ` Rafael J. Wysocki
@ 2007-03-01 23:10             ` Rafael J. Wysocki
  2007-03-04 13:50               ` Rafael J. Wysocki
  1 sibling, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-01 23:10 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Lameter, Pavel Machek, linux-mm

On Thursday, 1 March 2007 16:33, Rafael J. Wysocki wrote:
> On Thursday, 1 March 2007 16:18, Nick Piggin wrote:
> > Rafael J. Wysocki wrote:
> > > On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> > > 
> > >>On Wed, 28 Feb 2007, Pavel Machek wrote:
> > >>
> > >>
> > >>>I... actually do not like that patch. It adds code... at little or no
> > >>>benefit.
> > >>
> > >>We are looking into saving page flags since we are running out. The two 
> > >>page flags used by software suspend are rarely needed and should be taken 
> > >>out of the flags. If you can do it a different way then please do.
> > > 
> > > 
> > > As I have already said for a couple of times, I think we can and I'm going to
> > > do it, but right now I'm a bit busy with other things that I consider as more
> > > urgent.
> > 
> > I need one bit for lockless pagecache ;)
> > 
> > Anyway, I guess if you want something done you have to do it yourself.
> > 
> > This patch still needs work (and I don't know if it even works, because
> > I can't make swsusp resume even on a vanilla kernel). But this is my
> > WIP for removing swsusp page flags.
> > 
> > This patch adds a simple extent based nosave region tracker, and
> > rearranges some of the snapshot code to be a bit simpler and more
> > amenable to having dynamically allocated flags (they aren't actually
> > dynamically allocated in this patch, however).
> 
> Thanks for the patch.
> 
> Probably I'd like to do some things in a different way, I'll think about that
> later today.
> 
> I hope I'll have a working patch that removes the "offending" page flags after
> the weekend.

Well, there is one problem that I think you can help me solve.  Namely, the
arch code that marks the nosave pages runs very early, before the buddy
allocator is set up.  Thus the question is how I can allocate a list of nosave
regions at this stage of initialization, so that it survives to the point at
which swsusp can be activated.

Please advise.

Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-03-01 20:46           ` Rafael J. Wysocki
@ 2007-03-02 10:17             ` Pavel Machek
  0 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-02 10:17 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Nick Piggin, Christoph Lameter, linux-mm

Hi!

> > Anyway, I guess if you want something done you have to do it yourself.
> > 
> > This patch still needs work (and I don't know if it even works, because
> > I can't make swsusp resume even on a vanilla kernel).
> 
> That's interesting, BTW, because recently I've been having problems with
> finding a machine on which it doesn't work. ;-)  If you could tell me (in
> private) what the problems are, I'd try to help.

Feel free to cc me.

(Actually, rafael, something is very wrong in 2.6.21-rc1+. I got
broken swsusp, s2ram, bluetooth and MMC.)
									Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-01 15:33           ` Rafael J. Wysocki
@ 2007-03-04 13:50               ` Rafael J. Wysocki
  2007-03-04 13:50               ` Rafael J. Wysocki
  1 sibling, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 13:50 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: linux-mm, Johannes Berg, Christoph Lameter, Peter Zijlstra, pm list

On Thursday, 1 March 2007 16:33, Rafael J. Wysocki wrote:
> On Thursday, 1 March 2007 16:18, Nick Piggin wrote:
> > Rafael J. Wysocki wrote:
> > > On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> > > 
> > >>On Wed, 28 Feb 2007, Pavel Machek wrote:
> > >>
> > >>
> > >>>I... actually do not like that patch. It adds code... at little or no
> > >>>benefit.
> > >>
> > >>We are looking into saving page flags since we are running out. The two 
> > >>page flags used by software suspend are rarely needed and should be taken 
> > >>out of the flags. If you can do it a different way then please do.
> > > 
> > > 
> > > As I have already said for a couple of times, I think we can and I'm going to
> > > do it, but right now I'm a bit busy with other things that I consider as more
> > > urgent.
> > 
> > I need one bit for lockless pagecache ;)
> > 
> > Anyway, I guess if you want something done you have to do it yourself.
> > 
> > This patch still needs work (and I don't know if it even works, because
> > I can't make swsusp resume even on a vanilla kernel). But this is my
> > WIP for removing swsusp page flags.
> > 
> > This patch adds a simple extent based nosave region tracker, and
> > rearranges some of the snapshot code to be a bit simpler and more
> > amenable to having dynamically allocated flags (they aren't actually
> > dynamically allocated in this patch, however).
> 
> Thanks for the patch.
> 
> Probably I'd like to do some things in a different way, I'll think about that
> later today.
> 
> I hope I'll have a working patch that removes the "offending" page flags after
> the weekend.

Okay, the next three messages contain patches that should do the trick.

They have been tested on x86_64, but not very thoroughly.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-04 13:50               ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 13:50 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: Christoph Lameter, linux-mm, pm list, Johannes Berg, Peter Zijlstra

On Thursday, 1 March 2007 16:33, Rafael J. Wysocki wrote:
> On Thursday, 1 March 2007 16:18, Nick Piggin wrote:
> > Rafael J. Wysocki wrote:
> > > On Wednesday, 28 February 2007 16:25, Christoph Lameter wrote:
> > > 
> > >>On Wed, 28 Feb 2007, Pavel Machek wrote:
> > >>
> > >>
> > >>>I... actually do not like that patch. It adds code... at little or no
> > >>>benefit.
> > >>
> > >>We are looking into saving page flags since we are running out. The two 
> > >>page flags used by software suspend are rarely needed and should be taken 
> > >>out of the flags. If you can do it a different way then please do.
> > > 
> > > 
> > > As I have already said for a couple of times, I think we can and I'm going to
> > > do it, but right now I'm a bit busy with other things that I consider as more
> > > urgent.
> > 
> > I need one bit for lockless pagecache ;)
> > 
> > Anyway, I guess if you want something done you have to do it yourself.
> > 
> > This patch still needs work (and I don't know if it even works, because
> > I can't make swsusp resume even on a vanilla kernel). But this is my
> > WIP for removing swsusp page flags.
> > 
> > This patch adds a simple extent based nosave region tracker, and
> > rearranges some of the snapshot code to be a bit simpler and more
> > amenable to having dynamically allocated flags (they aren't actually
> > dynamically allocated in this patch, however).
> 
> Thanks for the patch.
> 
> Probably I'd like to do some things in a different way, I'll think about that
> later today.
> 
> I hope I'll have a working patch that removes the "offending" page flags after
> the weekend.

Okay, the next three messages contain patches that should do the trick.

They have been tested on x86_64, but not very thoroughly.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 1/3] swsusp: Do not use page flags directly
  2007-03-04 13:50               ` Rafael J. Wysocki
@ 2007-03-04 14:07                 ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 14:07 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: linux-mm, Johannes Berg, Christoph Lameter, Peter Zijlstra, pm list

Make swsusp stop using SetPageNosave(), SetPageNosaveFree() and friends
directly.

This way the amount of changes made in the next patch is smaller.

---
 include/linux/suspend.h |   33 +++++++++++++++++++++++++++++++++
 kernel/power/snapshot.c |   48 +++++++++++++++++++++++++-----------------------
 mm/page_alloc.c         |    6 +++---
 3 files changed, 61 insertions(+), 26 deletions(-)

Index: linux-2.6.21-rc2/include/linux/suspend.h
===================================================================
--- linux-2.6.21-rc2.orig/include/linux/suspend.h	2007-03-02 09:05:53.000000000 +0100
+++ linux-2.6.21-rc2/include/linux/suspend.h	2007-03-02 09:24:02.000000000 +0100
@@ -8,6 +8,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/mm.h>
 
 /* struct pbe is used for creating lists of pages that should be restored
  * atomically during the resume from disk, because the page frames they have
@@ -49,6 +50,38 @@ void __save_processor_state(struct saved
 void __restore_processor_state(struct saved_context *ctxt);
 unsigned long get_safe_page(gfp_t gfp_mask);
 
+/* Page management functions for the software suspend (swsusp) */
+
+static inline void swsusp_set_page_forbidden(struct page *page)
+{
+	SetPageNosave(page);
+}
+
+static inline int swsusp_page_is_forbidden(struct page *page)
+{
+	return PageNosave(page);
+}
+
+static inline void swsusp_unset_page_forbidden(struct page *page)
+{
+	ClearPageNosave(page);
+}
+
+static inline void swsusp_set_page_free(struct page *page)
+{
+	SetPageNosaveFree(page);
+}
+
+static inline int swsusp_page_is_free(struct page *page)
+{
+	return PageNosaveFree(page);
+}
+
+static inline void swsusp_unset_page_free(struct page *page)
+{
+	ClearPageNosaveFree(page);
+}
+
 /*
  * XXX: We try to keep some more pages free so that I/O operations succeed
  * without paging. Might this be more?
Index: linux-2.6.21-rc2/kernel/power/snapshot.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/snapshot.c	2007-03-02 09:05:53.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/snapshot.c	2007-03-02 09:27:06.000000000 +0100
@@ -67,15 +67,15 @@ static void *get_image_page(gfp_t gfp_ma
 
 	res = (void *)get_zeroed_page(gfp_mask);
 	if (safe_needed)
-		while (res && PageNosaveFree(virt_to_page(res))) {
+		while (res && swsusp_page_is_free(virt_to_page(res))) {
 			/* The page is unsafe, mark it for swsusp_free() */
-			SetPageNosave(virt_to_page(res));
+			swsusp_set_page_forbidden(virt_to_page(res));
 			allocated_unsafe_pages++;
 			res = (void *)get_zeroed_page(gfp_mask);
 		}
 	if (res) {
-		SetPageNosave(virt_to_page(res));
-		SetPageNosaveFree(virt_to_page(res));
+		swsusp_set_page_forbidden(virt_to_page(res));
+		swsusp_set_page_free(virt_to_page(res));
 	}
 	return res;
 }
@@ -91,8 +91,8 @@ static struct page *alloc_image_page(gfp
 
 	page = alloc_page(gfp_mask);
 	if (page) {
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
+		swsusp_set_page_forbidden(page);
+		swsusp_set_page_free(page);
 	}
 	return page;
 }
@@ -110,9 +110,9 @@ static inline void free_image_page(void 
 
 	page = virt_to_page(addr);
 
-	ClearPageNosave(page);
+	swsusp_unset_page_forbidden(page);
 	if (clear_nosave_free)
-		ClearPageNosaveFree(page);
+		swsusp_unset_page_free(page);
 
 	__free_page(page);
 }
@@ -615,7 +615,8 @@ static struct page *saveable_highmem_pag
 
 	BUG_ON(!PageHighMem(page));
 
-	if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
+	if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
+	    PageReserved(page))
 		return NULL;
 
 	return page;
@@ -681,7 +682,7 @@ static struct page *saveable_page(unsign
 
 	BUG_ON(PageHighMem(page));
 
-	if (PageNosave(page) || PageNosaveFree(page))
+	if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 		return NULL;
 
 	if (PageReserved(page) && pfn_is_nosave(pfn))
@@ -821,9 +822,10 @@ void swsusp_free(void)
 			if (pfn_valid(pfn)) {
 				struct page *page = pfn_to_page(pfn);
 
-				if (PageNosave(page) && PageNosaveFree(page)) {
-					ClearPageNosave(page);
-					ClearPageNosaveFree(page);
+				if (swsusp_page_is_forbidden(page) &&
+				    swsusp_page_is_free(page)) {
+					swsusp_unset_page_forbidden(page);
+					swsusp_unset_page_free(page);
 					__free_page(page);
 				}
 			}
@@ -1146,7 +1148,7 @@ static int mark_unsafe_pages(struct memo
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if (pfn_valid(pfn))
-				ClearPageNosaveFree(pfn_to_page(pfn));
+				swsusp_unset_page_free(pfn_to_page(pfn));
 	}
 
 	/* Mark pages that correspond to the "original" pfns as "unsafe" */
@@ -1155,7 +1157,7 @@ static int mark_unsafe_pages(struct memo
 		pfn = memory_bm_next_pfn(bm);
 		if (likely(pfn != BM_END_OF_MAP)) {
 			if (likely(pfn_valid(pfn)))
-				SetPageNosaveFree(pfn_to_page(pfn));
+				swsusp_set_page_free(pfn_to_page(pfn));
 			else
 				return -EFAULT;
 		}
@@ -1321,14 +1323,14 @@ prepare_highmem_image(struct memory_bitm
 		struct page *page;
 
 		page = alloc_page(__GFP_HIGHMEM);
-		if (!PageNosaveFree(page)) {
+		if (!swsusp_page_is_free(page)) {
 			/* The page is "safe", set its bit the bitmap */
 			memory_bm_set_bit(bm, page_to_pfn(page));
 			safe_highmem_pages++;
 		}
 		/* Mark the page as allocated */
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
+		swsusp_set_page_forbidden(page);
+		swsusp_set_page_free(page);
 	}
 	memory_bm_position_reset(bm);
 	safe_highmem_bm = bm;
@@ -1360,7 +1362,7 @@ get_highmem_page_buffer(struct page *pag
 	struct highmem_pbe *pbe;
 	void *kaddr;
 
-	if (PageNosave(page) && PageNosaveFree(page)) {
+	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
@@ -1522,14 +1524,14 @@ prepare_image(struct memory_bitmap *new_
 			error = -ENOMEM;
 			goto Free;
 		}
-		if (!PageNosaveFree(virt_to_page(lp))) {
+		if (!swsusp_page_is_free(virt_to_page(lp))) {
 			/* The page is "safe", add it to the list */
 			lp->next = safe_pages_list;
 			safe_pages_list = lp;
 		}
 		/* Mark the page as allocated */
-		SetPageNosave(virt_to_page(lp));
-		SetPageNosaveFree(virt_to_page(lp));
+		swsusp_set_page_forbidden(virt_to_page(lp));
+		swsusp_set_page_free(virt_to_page(lp));
 		nr_pages--;
 	}
 	/* Free the reserved safe pages so that chain_alloc() can use them */
@@ -1558,7 +1560,7 @@ static void *get_buffer(struct memory_bi
 	if (PageHighMem(page))
 		return get_highmem_page_buffer(page, ca);
 
-	if (PageNosave(page) && PageNosaveFree(page))
+	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
Index: linux-2.6.21-rc2/mm/page_alloc.c
===================================================================
--- linux-2.6.21-rc2.orig/mm/page_alloc.c	2007-03-02 09:03:43.000000000 +0100
+++ linux-2.6.21-rc2/mm/page_alloc.c	2007-03-02 09:11:54.000000000 +0100
@@ -770,8 +770,8 @@ void mark_free_pages(struct zone *zone)
 		if (pfn_valid(pfn)) {
 			struct page *page = pfn_to_page(pfn);
 
-			if (!PageNosave(page))
-				ClearPageNosaveFree(page);
+			if (!swsusp_page_is_forbidden(page))
+				swsusp_unset_page_free(page);
 		}
 
 	for (order = MAX_ORDER - 1; order >= 0; --order)
@@ -780,7 +780,7 @@ void mark_free_pages(struct zone *zone)
 
 			pfn = page_to_pfn(list_entry(curr, struct page, lru));
 			for (i = 0; i < (1UL << order); i++)
-				SetPageNosaveFree(pfn_to_page(pfn + i));
+				swsusp_set_page_free(pfn_to_page(pfn + i));
 		}
 
 	spin_unlock_irqrestore(&zone->lock, flags);

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 1/3] swsusp: Do not use page flags directly
@ 2007-03-04 14:07                 ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 14:07 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: Christoph Lameter, linux-mm, pm list, Johannes Berg, Peter Zijlstra

Make swsusp stop using SetPageNosave(), SetPageNosaveFree() and friends
directly.

This way the amount of changes made in the next patch is smaller.

---
 include/linux/suspend.h |   33 +++++++++++++++++++++++++++++++++
 kernel/power/snapshot.c |   48 +++++++++++++++++++++++++-----------------------
 mm/page_alloc.c         |    6 +++---
 3 files changed, 61 insertions(+), 26 deletions(-)

Index: linux-2.6.21-rc2/include/linux/suspend.h
===================================================================
--- linux-2.6.21-rc2.orig/include/linux/suspend.h	2007-03-02 09:05:53.000000000 +0100
+++ linux-2.6.21-rc2/include/linux/suspend.h	2007-03-02 09:24:02.000000000 +0100
@@ -8,6 +8,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/mm.h>
 
 /* struct pbe is used for creating lists of pages that should be restored
  * atomically during the resume from disk, because the page frames they have
@@ -49,6 +50,38 @@ void __save_processor_state(struct saved
 void __restore_processor_state(struct saved_context *ctxt);
 unsigned long get_safe_page(gfp_t gfp_mask);
 
+/* Page management functions for the software suspend (swsusp) */
+
+static inline void swsusp_set_page_forbidden(struct page *page)
+{
+	SetPageNosave(page);
+}
+
+static inline int swsusp_page_is_forbidden(struct page *page)
+{
+	return PageNosave(page);
+}
+
+static inline void swsusp_unset_page_forbidden(struct page *page)
+{
+	ClearPageNosave(page);
+}
+
+static inline void swsusp_set_page_free(struct page *page)
+{
+	SetPageNosaveFree(page);
+}
+
+static inline int swsusp_page_is_free(struct page *page)
+{
+	return PageNosaveFree(page);
+}
+
+static inline void swsusp_unset_page_free(struct page *page)
+{
+	ClearPageNosaveFree(page);
+}
+
 /*
  * XXX: We try to keep some more pages free so that I/O operations succeed
  * without paging. Might this be more?
Index: linux-2.6.21-rc2/kernel/power/snapshot.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/snapshot.c	2007-03-02 09:05:53.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/snapshot.c	2007-03-02 09:27:06.000000000 +0100
@@ -67,15 +67,15 @@ static void *get_image_page(gfp_t gfp_ma
 
 	res = (void *)get_zeroed_page(gfp_mask);
 	if (safe_needed)
-		while (res && PageNosaveFree(virt_to_page(res))) {
+		while (res && swsusp_page_is_free(virt_to_page(res))) {
 			/* The page is unsafe, mark it for swsusp_free() */
-			SetPageNosave(virt_to_page(res));
+			swsusp_set_page_forbidden(virt_to_page(res));
 			allocated_unsafe_pages++;
 			res = (void *)get_zeroed_page(gfp_mask);
 		}
 	if (res) {
-		SetPageNosave(virt_to_page(res));
-		SetPageNosaveFree(virt_to_page(res));
+		swsusp_set_page_forbidden(virt_to_page(res));
+		swsusp_set_page_free(virt_to_page(res));
 	}
 	return res;
 }
@@ -91,8 +91,8 @@ static struct page *alloc_image_page(gfp
 
 	page = alloc_page(gfp_mask);
 	if (page) {
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
+		swsusp_set_page_forbidden(page);
+		swsusp_set_page_free(page);
 	}
 	return page;
 }
@@ -110,9 +110,9 @@ static inline void free_image_page(void 
 
 	page = virt_to_page(addr);
 
-	ClearPageNosave(page);
+	swsusp_unset_page_forbidden(page);
 	if (clear_nosave_free)
-		ClearPageNosaveFree(page);
+		swsusp_unset_page_free(page);
 
 	__free_page(page);
 }
@@ -615,7 +615,8 @@ static struct page *saveable_highmem_pag
 
 	BUG_ON(!PageHighMem(page));
 
-	if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
+	if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
+	    PageReserved(page))
 		return NULL;
 
 	return page;
@@ -681,7 +682,7 @@ static struct page *saveable_page(unsign
 
 	BUG_ON(PageHighMem(page));
 
-	if (PageNosave(page) || PageNosaveFree(page))
+	if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 		return NULL;
 
 	if (PageReserved(page) && pfn_is_nosave(pfn))
@@ -821,9 +822,10 @@ void swsusp_free(void)
 			if (pfn_valid(pfn)) {
 				struct page *page = pfn_to_page(pfn);
 
-				if (PageNosave(page) && PageNosaveFree(page)) {
-					ClearPageNosave(page);
-					ClearPageNosaveFree(page);
+				if (swsusp_page_is_forbidden(page) &&
+				    swsusp_page_is_free(page)) {
+					swsusp_unset_page_forbidden(page);
+					swsusp_unset_page_free(page);
 					__free_page(page);
 				}
 			}
@@ -1146,7 +1148,7 @@ static int mark_unsafe_pages(struct memo
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if (pfn_valid(pfn))
-				ClearPageNosaveFree(pfn_to_page(pfn));
+				swsusp_unset_page_free(pfn_to_page(pfn));
 	}
 
 	/* Mark pages that correspond to the "original" pfns as "unsafe" */
@@ -1155,7 +1157,7 @@ static int mark_unsafe_pages(struct memo
 		pfn = memory_bm_next_pfn(bm);
 		if (likely(pfn != BM_END_OF_MAP)) {
 			if (likely(pfn_valid(pfn)))
-				SetPageNosaveFree(pfn_to_page(pfn));
+				swsusp_set_page_free(pfn_to_page(pfn));
 			else
 				return -EFAULT;
 		}
@@ -1321,14 +1323,14 @@ prepare_highmem_image(struct memory_bitm
 		struct page *page;
 
 		page = alloc_page(__GFP_HIGHMEM);
-		if (!PageNosaveFree(page)) {
+		if (!swsusp_page_is_free(page)) {
 			/* The page is "safe", set its bit the bitmap */
 			memory_bm_set_bit(bm, page_to_pfn(page));
 			safe_highmem_pages++;
 		}
 		/* Mark the page as allocated */
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
+		swsusp_set_page_forbidden(page);
+		swsusp_set_page_free(page);
 	}
 	memory_bm_position_reset(bm);
 	safe_highmem_bm = bm;
@@ -1360,7 +1362,7 @@ get_highmem_page_buffer(struct page *pag
 	struct highmem_pbe *pbe;
 	void *kaddr;
 
-	if (PageNosave(page) && PageNosaveFree(page)) {
+	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
@@ -1522,14 +1524,14 @@ prepare_image(struct memory_bitmap *new_
 			error = -ENOMEM;
 			goto Free;
 		}
-		if (!PageNosaveFree(virt_to_page(lp))) {
+		if (!swsusp_page_is_free(virt_to_page(lp))) {
 			/* The page is "safe", add it to the list */
 			lp->next = safe_pages_list;
 			safe_pages_list = lp;
 		}
 		/* Mark the page as allocated */
-		SetPageNosave(virt_to_page(lp));
-		SetPageNosaveFree(virt_to_page(lp));
+		swsusp_set_page_forbidden(virt_to_page(lp));
+		swsusp_set_page_free(virt_to_page(lp));
 		nr_pages--;
 	}
 	/* Free the reserved safe pages so that chain_alloc() can use them */
@@ -1558,7 +1560,7 @@ static void *get_buffer(struct memory_bi
 	if (PageHighMem(page))
 		return get_highmem_page_buffer(page, ca);
 
-	if (PageNosave(page) && PageNosaveFree(page))
+	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
Index: linux-2.6.21-rc2/mm/page_alloc.c
===================================================================
--- linux-2.6.21-rc2.orig/mm/page_alloc.c	2007-03-02 09:03:43.000000000 +0100
+++ linux-2.6.21-rc2/mm/page_alloc.c	2007-03-02 09:11:54.000000000 +0100
@@ -770,8 +770,8 @@ void mark_free_pages(struct zone *zone)
 		if (pfn_valid(pfn)) {
 			struct page *page = pfn_to_page(pfn);
 
-			if (!PageNosave(page))
-				ClearPageNosaveFree(page);
+			if (!swsusp_page_is_forbidden(page))
+				swsusp_unset_page_free(page);
 		}
 
 	for (order = MAX_ORDER - 1; order >= 0; --order)
@@ -780,7 +780,7 @@ void mark_free_pages(struct zone *zone)
 
 			pfn = page_to_pfn(list_entry(curr, struct page, lru));
 			for (i = 0; i < (1UL << order); i++)
-				SetPageNosaveFree(pfn_to_page(pfn + i));
+				swsusp_set_page_free(pfn_to_page(pfn + i));
 		}
 
 	spin_unlock_irqrestore(&zone->lock, flags);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-04 13:50               ` Rafael J. Wysocki
@ 2007-03-04 14:07                 ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 14:07 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: linux-mm, Johannes Berg, Christoph Lameter, Peter Zijlstra, pm list

Make swsusp use memory bitmaps instead of page flags for marking 'nosave' and
free pages.  This allows us to 'recycle' two page flags that can be used for other
purposes.  Also, the memory needed to store the bitmaps is allocated when
necessary (ie. before the suspend) and freed after the resume which is more
reasonable.

The patch is designed to minimize the amount of changes and there are some nice
simplifications and optimizations possible on top of it.  I am going to post
them as separate patches in the future.

---
 arch/x86_64/kernel/e820.c |   26 +---
 include/linux/suspend.h   |   58 +++-------
 kernel/power/disk.c       |   23 +++-
 kernel/power/power.h      |    2 
 kernel/power/snapshot.c   |  250 +++++++++++++++++++++++++++++++++++++++++++---
 kernel/power/user.c       |    4 
 6 files changed, 281 insertions(+), 82 deletions(-)

Index: linux-2.6.21-rc2/include/linux/suspend.h
===================================================================
--- linux-2.6.21-rc2.orig/include/linux/suspend.h	2007-03-04 11:52:46.000000000 +0100
+++ linux-2.6.21-rc2/include/linux/suspend.h	2007-03-04 11:52:46.000000000 +0100
@@ -24,63 +24,41 @@ struct pbe {
 extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
 
-#ifdef CONFIG_PM
-/* kernel/power/swsusp.c */
-extern int software_suspend(void);
-
-#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#if defined(CONFIG_PM) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
 extern int pm_prepare_console(void);
 extern void pm_restore_console(void);
 #else
 static inline int pm_prepare_console(void) { return 0; }
 static inline void pm_restore_console(void) {}
-#endif /* defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) */
+#endif
+
+#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+/* kernel/power/swsusp.c */
+extern int software_suspend(void);
+/* kernel/power/snapshot.c */
+extern void __init register_nosave_region(unsigned long, unsigned long);
+extern int swsusp_page_is_forbidden(struct page *);
+extern void swsusp_set_page_free(struct page *);
+extern void swsusp_unset_page_free(struct page *);
+extern unsigned long get_safe_page(gfp_t gfp_mask);
 #else
 static inline int software_suspend(void)
 {
 	printk("Warning: fake suspend called\n");
 	return -ENOSYS;
 }
-#endif /* CONFIG_PM */
+
+static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
+static inline void swsusp_set_page_free(struct page *p) {}
+static inline void swsusp_unset_page_free(struct page *p) {}
+#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
 
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
 void __save_processor_state(struct saved_context *ctxt);
 void __restore_processor_state(struct saved_context *ctxt);
-unsigned long get_safe_page(gfp_t gfp_mask);
-
-/* Page management functions for the software suspend (swsusp) */
-
-static inline void swsusp_set_page_forbidden(struct page *page)
-{
-	SetPageNosave(page);
-}
-
-static inline int swsusp_page_is_forbidden(struct page *page)
-{
-	return PageNosave(page);
-}
-
-static inline void swsusp_unset_page_forbidden(struct page *page)
-{
-	ClearPageNosave(page);
-}
-
-static inline void swsusp_set_page_free(struct page *page)
-{
-	SetPageNosaveFree(page);
-}
-
-static inline int swsusp_page_is_free(struct page *page)
-{
-	return PageNosaveFree(page);
-}
-
-static inline void swsusp_unset_page_free(struct page *page)
-{
-	ClearPageNosaveFree(page);
-}
 
 /*
  * XXX: We try to keep some more pages free so that I/O operations succeed
Index: linux-2.6.21-rc2/kernel/power/snapshot.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/snapshot.c	2007-03-04 11:52:46.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/snapshot.c	2007-03-04 15:06:13.000000000 +0100
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/pm.h>
 #include <linux/device.h>
+#include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/console.h>
@@ -34,6 +35,10 @@
 
 #include "power.h"
 
+static int swsusp_page_is_free(struct page *);
+static void swsusp_set_page_forbidden(struct page *);
+static void swsusp_unset_page_forbidden(struct page *);
+
 /* List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written
@@ -224,11 +229,6 @@ static void chain_free(struct chain_allo
  *	of type unsigned long each).  It also contains the pfns that
  *	correspond to the start and end of the represented memory area and
  *	the number of bit chunks in the block.
- *
- *	NOTE: Memory bitmaps are used for two types of operations only:
- *	"set a bit" and "find the next bit set".  Moreover, the searching
- *	is always carried out after all of the "set a bit" operations
- *	on given bitmap.
  */
 
 #define BM_END_OF_MAP	(~0UL)
@@ -443,15 +443,13 @@ static void memory_bm_free(struct memory
 }
 
 /**
- *	memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
+ *	memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
  *	to given pfn.  The cur_zone_bm member of @bm and the cur_block member
  *	of @bm->cur_zone_bm are updated.
- *
- *	If the bit cannot be set, the function returns -EINVAL .
  */
 
-static int
-memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
+				void **addr, unsigned int *bit_nr)
 {
 	struct zone_bitmap *zone_bm;
 	struct bm_block *bb;
@@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *
 		/* We don't assume that the zones are sorted by pfns */
 		while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
 			zone_bm = zone_bm->next;
-			if (unlikely(!zone_bm))
-				return -EINVAL;
+
+			BUG_ON(!zone_bm);
 		}
 		bm->cur.zone_bm = zone_bm;
 	}
@@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *
 
 	while (pfn >= bb->end_pfn) {
 		bb = bb->next;
-		if (unlikely(!bb))
-			return -EINVAL;
+
+		BUG_ON(!bb);
 	}
 	zone_bm->cur_block = bb;
 	pfn -= bb->start_pfn;
-	set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
-	return 0;
+	*bit_nr = pfn % BM_BITS_PER_CHUNK;
+	*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+}
+
+static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+
+	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	set_bit(bit, addr);
+}
+
+static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+
+	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	clear_bit(bit, addr);
+}
+
+static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+
+	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	return test_bit(bit, addr);
 }
 
 /* Two auxiliary functions for memory_bm_next_pfn */
@@ -564,6 +589,199 @@ static unsigned long memory_bm_next_pfn(
 }
 
 /**
+ *	This structure represents a range of page frames the contents of which
+ *	should not be saved during the suspend.
+ */
+
+struct nosave_region {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+};
+
+static LIST_HEAD(nosave_regions);
+
+/**
+ *	register_nosave_region - register a range of page frames the contents
+ *	of which should not be saved during the suspend (to be used in the early
+ *	initializatoion code)
+ */
+
+void __init
+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct nosave_region *region;
+
+	if (start_pfn >= end_pfn)
+		return;
+
+	if (!list_empty(&nosave_regions)) {
+		/* Try to extend the previous region (they should be sorted) */
+		region = list_entry(nosave_regions.prev,
+					struct nosave_region, list);
+		if (region->end_pfn == start_pfn) {
+			region->end_pfn = end_pfn;
+			goto Report;
+		}
+	}
+	/* This allocation cannot fail */
+	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	region->start_pfn = start_pfn;
+	region->end_pfn = end_pfn;
+	list_add_tail(&region->list, &nosave_regions);
+ Report:
+	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
+		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
+}
+
+/*
+ * Set bits in this map correspond to the page frames the contents of which
+ * should not be saved during the suspend.
+ */
+static struct memory_bitmap *forbidden_pages_map;
+
+/* Set bits in this map correspond to free page frames. */
+static struct memory_bitmap *free_pages_map;
+
+/*
+ * Each page frame allocated for creating the image is marked by setting the
+ * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
+ */
+
+void swsusp_set_page_free(struct page *page)
+{
+	if (free_pages_map)
+		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
+}
+
+static int swsusp_page_is_free(struct page *page)
+{
+	return free_pages_map ?
+		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
+}
+
+void swsusp_unset_page_free(struct page *page)
+{
+	if (free_pages_map)
+		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
+}
+
+static void swsusp_set_page_forbidden(struct page *page)
+{
+	if (forbidden_pages_map)
+		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
+}
+
+int swsusp_page_is_forbidden(struct page *page)
+{
+	return forbidden_pages_map ?
+		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
+}
+
+static void swsusp_unset_page_forbidden(struct page *page)
+{
+	if (forbidden_pages_map)
+		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
+}
+
+/**
+ *	mark_nosave_pages - set bits corresponding to the page frames the
+ *	contents of which should not be saved in a given bitmap.
+ */
+
+static void mark_nosave_pages(struct memory_bitmap *bm)
+{
+	struct nosave_region *region;
+
+	if (list_empty(&nosave_regions))
+		return;
+
+	list_for_each_entry(region, &nosave_regions, list) {
+		unsigned long pfn;
+
+		printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
+				region->start_pfn << PAGE_SHIFT,
+				region->end_pfn << PAGE_SHIFT);
+
+		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+			memory_bm_set_bit(bm, pfn);
+	}
+}
+
+/**
+ *	create_basic_memory_bitmaps - create bitmaps needed for marking page
+ *	frames that should not be saved and free page frames.  The pointers
+ *	forbidden_pages_map and free_pages_map are only modified if everything
+ *	goes well, because we don't want the bits to be used before both bitmaps
+ *	are set up.
+ */
+
+int create_basic_memory_bitmaps(void)
+{
+	struct memory_bitmap *bm1, *bm2;
+	int error = 0;
+
+	BUG_ON(forbidden_pages_map || free_pages_map);
+
+	bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC);
+	if (!bm1)
+		return -ENOMEM;
+
+	error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+	if (error)
+		goto Free_first_object;
+
+	bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC);
+	if (!bm2)
+		goto Free_first_bitmap;
+
+	error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+	if (error)
+		goto Free_second_object;
+
+	forbidden_pages_map = bm1;
+	free_pages_map = bm2;
+	mark_nosave_pages(forbidden_pages_map);
+
+	printk("swsusp: Basic memory bitmaps created\n");
+
+	return 0;
+
+ Free_second_object:
+	kfree(bm2);
+ Free_first_bitmap:
+ 	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+ Free_first_object:
+	kfree(bm1);
+	return -ENOMEM;
+}
+
+/**
+ *	free_basic_memory_bitmaps - free memory bitmaps allocated by
+ *	create_basic_memory_bitmaps().  The auxiliary pointers are necessary
+ *	so that the bitmaps themselves are not referred to while they are being
+ *	freed.
+ */
+
+void free_basic_memory_bitmaps(void)
+{
+	struct memory_bitmap *bm1, *bm2;
+
+	BUG_ON(!(forbidden_pages_map && free_pages_map));
+
+	bm1 = forbidden_pages_map;
+	bm2 = free_pages_map;
+	forbidden_pages_map = NULL;
+	free_pages_map = NULL;
+	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+	kfree(bm1);
+	memory_bm_free(bm2, PG_UNSAFE_CLEAR);
+	kfree(bm2);
+
+	printk("swsusp: Basic memory bitmaps freed\n");
+}
+
+/**
  *	snapshot_additional_pages - estimate the number of additional pages
  *	be needed for setting up the suspend image data structures for given
  *	zone (usually the returned value is greater than the exact number)
Index: linux-2.6.21-rc2/arch/x86_64/kernel/e820.c
===================================================================
--- linux-2.6.21-rc2.orig/arch/x86_64/kernel/e820.c	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/arch/x86_64/kernel/e820.c	2007-03-04 13:25:20.000000000 +0100
@@ -17,6 +17,8 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/pfn.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -255,22 +257,6 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-/* Mark pages corresponding to given address range as nosave */
-static void __init
-e820_mark_nosave_range(unsigned long start, unsigned long end)
-{
-	unsigned long pfn, max_pfn;
-
-	if (start >= end)
-		return;
-
-	printk("Nosave address range: %016lx - %016lx\n", start, end);
-	max_pfn = end >> PAGE_SHIFT;
-	for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
-		if (pfn_valid(pfn))
-			SetPageNosave(pfn_to_page(pfn));
-}
-
 /*
  * Find the ranges of physical addresses that do not correspond to
  * e820 RAM areas and mark the corresponding pages as nosave for software
@@ -289,13 +275,13 @@ void __init e820_mark_nosave_regions(voi
 		struct e820entry *ei = &e820.map[i];
 
 		if (paddr < ei->addr)
-			e820_mark_nosave_range(paddr,
-					round_up(ei->addr, PAGE_SIZE));
+			register_nosave_region(PFN_DOWN(paddr),
+						PFN_UP(ei->addr));
 
 		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
 		if (ei->type != E820_RAM)
-			e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
-					paddr);
+			register_nosave_region(PFN_UP(ei->addr),
+						PFN_DOWN(paddr));
 
 		if (paddr >= (end_pfn << PAGE_SHIFT))
 			break;
Index: linux-2.6.21-rc2/kernel/power/power.h
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/power.h	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/power.h	2007-03-04 11:52:46.000000000 +0100
@@ -49,6 +49,8 @@ extern sector_t swsusp_resume_block;
 extern asmlinkage int swsusp_arch_suspend(void);
 extern asmlinkage int swsusp_arch_resume(void);
 
+extern int create_basic_memory_bitmaps(void);
+extern void free_basic_memory_bitmaps(void);
 extern unsigned int count_data_pages(void);
 
 /**
Index: linux-2.6.21-rc2/kernel/power/user.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/user.c	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/user.c	2007-03-04 14:02:19.000000000 +0100
@@ -52,6 +52,9 @@ static int snapshot_open(struct inode *i
 	if ((filp->f_flags & O_ACCMODE) == O_RDWR)
 		return -ENOSYS;
 
+	if(create_basic_memory_bitmaps())
+		return -ENOMEM;
+
 	nonseekable_open(inode, filp);
 	data = &snapshot_state;
 	filp->private_data = data;
@@ -77,6 +80,7 @@ static int snapshot_release(struct inode
 	struct snapshot_data *data;
 
 	swsusp_free();
+	free_basic_memory_bitmaps();
 	data = filp->private_data;
 	free_all_swap_pages(data->swap, data->bitmap);
 	free_bitmap(data->bitmap);
Index: linux-2.6.21-rc2/kernel/power/disk.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/disk.c	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/disk.c	2007-03-04 11:52:46.000000000 +0100
@@ -127,14 +127,19 @@ int pm_suspend_disk(void)
 		mdelay(5000);
 		goto Thaw;
 	}
+	/* Allocate memory management structures */
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Thaw;
+
 	/* Free memory before shutting down devices. */
 	error = swsusp_shrink_memory();
 	if (error)
-		goto Thaw;
+		goto Finish;
 
 	error = platform_prepare();
 	if (error)
-		goto Thaw;
+		goto Finish;
 
 	suspend_console();
 	error = device_suspend(PMSG_FREEZE);
@@ -169,7 +174,7 @@ int pm_suspend_disk(void)
 			power_down(pm_disk_mode);
 		else {
 			swsusp_free();
-			goto Thaw;
+			goto Finish;
 		}
 	} else {
 		pr_debug("PM: Image restored successfully.\n");
@@ -182,6 +187,8 @@ int pm_suspend_disk(void)
 	platform_finish();
 	device_resume();
 	resume_console();
+ Finish:
+	free_basic_memory_bitmaps();
  Thaw:
 	unprepare_processes();
 	return error;
@@ -227,13 +234,15 @@ static int software_resume(void)
 	}
 
 	pr_debug("PM: Checking swsusp image.\n");
-
 	error = swsusp_check();
 	if (error)
-		goto Done;
+		goto Unlock;
 
-	pr_debug("PM: Preparing processes for restore.\n");
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Unlock;
 
+	pr_debug("PM: Preparing processes for restore.\n");
 	error = prepare_processes();
 	if (error) {
 		swsusp_close();
@@ -275,7 +284,9 @@ static int software_resume(void)
 	printk(KERN_ERR "PM: Restore failed, recovering.\n");
 	unprepare_processes();
  Done:
+	free_basic_memory_bitmaps();
 	/* For success case, the suspend path will release the lock */
+ Unlock:
 	mutex_unlock(&pm_mutex);
 	pr_debug("PM: Resume from disk failed.\n");
 	return 0;

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-04 14:07                 ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 14:07 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: Christoph Lameter, linux-mm, pm list, Johannes Berg, Peter Zijlstra

Make swsusp use memory bitmaps instead of page flags for marking 'nosave' and
free pages.  This allows us to 'recycle' two page flags that can be used for other
purposes.  Also, the memory needed to store the bitmaps is allocated when
necessary (ie. before the suspend) and freed after the resume which is more
reasonable.

The patch is designed to minimize the amount of changes and there are some nice
simplifications and optimizations possible on top of it.  I am going to post
them as separate patches in the future.

---
 arch/x86_64/kernel/e820.c |   26 +---
 include/linux/suspend.h   |   58 +++-------
 kernel/power/disk.c       |   23 +++-
 kernel/power/power.h      |    2 
 kernel/power/snapshot.c   |  250 +++++++++++++++++++++++++++++++++++++++++++---
 kernel/power/user.c       |    4 
 6 files changed, 281 insertions(+), 82 deletions(-)

Index: linux-2.6.21-rc2/include/linux/suspend.h
===================================================================
--- linux-2.6.21-rc2.orig/include/linux/suspend.h	2007-03-04 11:52:46.000000000 +0100
+++ linux-2.6.21-rc2/include/linux/suspend.h	2007-03-04 11:52:46.000000000 +0100
@@ -24,63 +24,41 @@ struct pbe {
 extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
 
-#ifdef CONFIG_PM
-/* kernel/power/swsusp.c */
-extern int software_suspend(void);
-
-#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#if defined(CONFIG_PM) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
 extern int pm_prepare_console(void);
 extern void pm_restore_console(void);
 #else
 static inline int pm_prepare_console(void) { return 0; }
 static inline void pm_restore_console(void) {}
-#endif /* defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) */
+#endif
+
+#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+/* kernel/power/swsusp.c */
+extern int software_suspend(void);
+/* kernel/power/snapshot.c */
+extern void __init register_nosave_region(unsigned long, unsigned long);
+extern int swsusp_page_is_forbidden(struct page *);
+extern void swsusp_set_page_free(struct page *);
+extern void swsusp_unset_page_free(struct page *);
+extern unsigned long get_safe_page(gfp_t gfp_mask);
 #else
 static inline int software_suspend(void)
 {
 	printk("Warning: fake suspend called\n");
 	return -ENOSYS;
 }
-#endif /* CONFIG_PM */
+
+static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
+static inline void swsusp_set_page_free(struct page *p) {}
+static inline void swsusp_unset_page_free(struct page *p) {}
+#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
 
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
 void __save_processor_state(struct saved_context *ctxt);
 void __restore_processor_state(struct saved_context *ctxt);
-unsigned long get_safe_page(gfp_t gfp_mask);
-
-/* Page management functions for the software suspend (swsusp) */
-
-static inline void swsusp_set_page_forbidden(struct page *page)
-{
-	SetPageNosave(page);
-}
-
-static inline int swsusp_page_is_forbidden(struct page *page)
-{
-	return PageNosave(page);
-}
-
-static inline void swsusp_unset_page_forbidden(struct page *page)
-{
-	ClearPageNosave(page);
-}
-
-static inline void swsusp_set_page_free(struct page *page)
-{
-	SetPageNosaveFree(page);
-}
-
-static inline int swsusp_page_is_free(struct page *page)
-{
-	return PageNosaveFree(page);
-}
-
-static inline void swsusp_unset_page_free(struct page *page)
-{
-	ClearPageNosaveFree(page);
-}
 
 /*
  * XXX: We try to keep some more pages free so that I/O operations succeed
Index: linux-2.6.21-rc2/kernel/power/snapshot.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/snapshot.c	2007-03-04 11:52:46.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/snapshot.c	2007-03-04 15:06:13.000000000 +0100
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/pm.h>
 #include <linux/device.h>
+#include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/console.h>
@@ -34,6 +35,10 @@
 
 #include "power.h"
 
+static int swsusp_page_is_free(struct page *);
+static void swsusp_set_page_forbidden(struct page *);
+static void swsusp_unset_page_forbidden(struct page *);
+
 /* List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written
@@ -224,11 +229,6 @@ static void chain_free(struct chain_allo
  *	of type unsigned long each).  It also contains the pfns that
  *	correspond to the start and end of the represented memory area and
  *	the number of bit chunks in the block.
- *
- *	NOTE: Memory bitmaps are used for two types of operations only:
- *	"set a bit" and "find the next bit set".  Moreover, the searching
- *	is always carried out after all of the "set a bit" operations
- *	on given bitmap.
  */
 
 #define BM_END_OF_MAP	(~0UL)
@@ -443,15 +443,13 @@ static void memory_bm_free(struct memory
 }
 
 /**
- *	memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
+ *	memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
  *	to given pfn.  The cur_zone_bm member of @bm and the cur_block member
  *	of @bm->cur_zone_bm are updated.
- *
- *	If the bit cannot be set, the function returns -EINVAL .
  */
 
-static int
-memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
+				void **addr, unsigned int *bit_nr)
 {
 	struct zone_bitmap *zone_bm;
 	struct bm_block *bb;
@@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *
 		/* We don't assume that the zones are sorted by pfns */
 		while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
 			zone_bm = zone_bm->next;
-			if (unlikely(!zone_bm))
-				return -EINVAL;
+
+			BUG_ON(!zone_bm);
 		}
 		bm->cur.zone_bm = zone_bm;
 	}
@@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *
 
 	while (pfn >= bb->end_pfn) {
 		bb = bb->next;
-		if (unlikely(!bb))
-			return -EINVAL;
+
+		BUG_ON(!bb);
 	}
 	zone_bm->cur_block = bb;
 	pfn -= bb->start_pfn;
-	set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
-	return 0;
+	*bit_nr = pfn % BM_BITS_PER_CHUNK;
+	*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+}
+
+static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+
+	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	set_bit(bit, addr);
+}
+
+static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+
+	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	clear_bit(bit, addr);
+}
+
+static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+
+	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	return test_bit(bit, addr);
 }
 
 /* Two auxiliary functions for memory_bm_next_pfn */
@@ -564,6 +589,199 @@ static unsigned long memory_bm_next_pfn(
 }
 
 /**
+ *	This structure represents a range of page frames the contents of which
+ *	should not be saved during the suspend.
+ */
+
+struct nosave_region {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+};
+
+static LIST_HEAD(nosave_regions);
+
+/**
+ *	register_nosave_region - register a range of page frames the contents
+ *	of which should not be saved during the suspend (to be used in the early
+ *	initializatoion code)
+ */
+
+void __init
+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct nosave_region *region;
+
+	if (start_pfn >= end_pfn)
+		return;
+
+	if (!list_empty(&nosave_regions)) {
+		/* Try to extend the previous region (they should be sorted) */
+		region = list_entry(nosave_regions.prev,
+					struct nosave_region, list);
+		if (region->end_pfn == start_pfn) {
+			region->end_pfn = end_pfn;
+			goto Report;
+		}
+	}
+	/* This allocation cannot fail */
+	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	region->start_pfn = start_pfn;
+	region->end_pfn = end_pfn;
+	list_add_tail(&region->list, &nosave_regions);
+ Report:
+	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
+		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
+}
+
+/*
+ * Set bits in this map correspond to the page frames the contents of which
+ * should not be saved during the suspend.
+ */
+static struct memory_bitmap *forbidden_pages_map;
+
+/* Set bits in this map correspond to free page frames. */
+static struct memory_bitmap *free_pages_map;
+
+/*
+ * Each page frame allocated for creating the image is marked by setting the
+ * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
+ */
+
+void swsusp_set_page_free(struct page *page)
+{
+	if (free_pages_map)
+		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
+}
+
+static int swsusp_page_is_free(struct page *page)
+{
+	return free_pages_map ?
+		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
+}
+
+void swsusp_unset_page_free(struct page *page)
+{
+	if (free_pages_map)
+		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
+}
+
+static void swsusp_set_page_forbidden(struct page *page)
+{
+	if (forbidden_pages_map)
+		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
+}
+
+int swsusp_page_is_forbidden(struct page *page)
+{
+	return forbidden_pages_map ?
+		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
+}
+
+static void swsusp_unset_page_forbidden(struct page *page)
+{
+	if (forbidden_pages_map)
+		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
+}
+
+/**
+ *	mark_nosave_pages - set bits corresponding to the page frames the
+ *	contents of which should not be saved in a given bitmap.
+ */
+
+static void mark_nosave_pages(struct memory_bitmap *bm)
+{
+	struct nosave_region *region;
+
+	if (list_empty(&nosave_regions))
+		return;
+
+	list_for_each_entry(region, &nosave_regions, list) {
+		unsigned long pfn;
+
+		printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
+				region->start_pfn << PAGE_SHIFT,
+				region->end_pfn << PAGE_SHIFT);
+
+		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+			memory_bm_set_bit(bm, pfn);
+	}
+}
+
+/**
+ *	create_basic_memory_bitmaps - create bitmaps needed for marking page
+ *	frames that should not be saved and free page frames.  The pointers
+ *	forbidden_pages_map and free_pages_map are only modified if everything
+ *	goes well, because we don't want the bits to be used before both bitmaps
+ *	are set up.
+ */
+
+int create_basic_memory_bitmaps(void)
+{
+	struct memory_bitmap *bm1, *bm2;
+	int error = 0;
+
+	BUG_ON(forbidden_pages_map || free_pages_map);
+
+	bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC);
+	if (!bm1)
+		return -ENOMEM;
+
+	error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+	if (error)
+		goto Free_first_object;
+
+	bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC);
+	if (!bm2)
+		goto Free_first_bitmap;
+
+	error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+	if (error)
+		goto Free_second_object;
+
+	forbidden_pages_map = bm1;
+	free_pages_map = bm2;
+	mark_nosave_pages(forbidden_pages_map);
+
+	printk("swsusp: Basic memory bitmaps created\n");
+
+	return 0;
+
+ Free_second_object:
+	kfree(bm2);
+ Free_first_bitmap:
+ 	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+ Free_first_object:
+	kfree(bm1);
+	return -ENOMEM;
+}
+
+/**
+ *	free_basic_memory_bitmaps - free memory bitmaps allocated by
+ *	create_basic_memory_bitmaps().  The auxiliary pointers are necessary
+ *	so that the bitmaps themselves are not referred to while they are being
+ *	freed.
+ */
+
+void free_basic_memory_bitmaps(void)
+{
+	struct memory_bitmap *bm1, *bm2;
+
+	BUG_ON(!(forbidden_pages_map && free_pages_map));
+
+	bm1 = forbidden_pages_map;
+	bm2 = free_pages_map;
+	forbidden_pages_map = NULL;
+	free_pages_map = NULL;
+	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+	kfree(bm1);
+	memory_bm_free(bm2, PG_UNSAFE_CLEAR);
+	kfree(bm2);
+
+	printk("swsusp: Basic memory bitmaps freed\n");
+}
+
+/**
  *	snapshot_additional_pages - estimate the number of additional pages
  *	be needed for setting up the suspend image data structures for given
  *	zone (usually the returned value is greater than the exact number)
Index: linux-2.6.21-rc2/arch/x86_64/kernel/e820.c
===================================================================
--- linux-2.6.21-rc2.orig/arch/x86_64/kernel/e820.c	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/arch/x86_64/kernel/e820.c	2007-03-04 13:25:20.000000000 +0100
@@ -17,6 +17,8 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/pfn.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -255,22 +257,6 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-/* Mark pages corresponding to given address range as nosave */
-static void __init
-e820_mark_nosave_range(unsigned long start, unsigned long end)
-{
-	unsigned long pfn, max_pfn;
-
-	if (start >= end)
-		return;
-
-	printk("Nosave address range: %016lx - %016lx\n", start, end);
-	max_pfn = end >> PAGE_SHIFT;
-	for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
-		if (pfn_valid(pfn))
-			SetPageNosave(pfn_to_page(pfn));
-}
-
 /*
  * Find the ranges of physical addresses that do not correspond to
  * e820 RAM areas and mark the corresponding pages as nosave for software
@@ -289,13 +275,13 @@ void __init e820_mark_nosave_regions(voi
 		struct e820entry *ei = &e820.map[i];
 
 		if (paddr < ei->addr)
-			e820_mark_nosave_range(paddr,
-					round_up(ei->addr, PAGE_SIZE));
+			register_nosave_region(PFN_DOWN(paddr),
+						PFN_UP(ei->addr));
 
 		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
 		if (ei->type != E820_RAM)
-			e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
-					paddr);
+			register_nosave_region(PFN_UP(ei->addr),
+						PFN_DOWN(paddr));
 
 		if (paddr >= (end_pfn << PAGE_SHIFT))
 			break;
Index: linux-2.6.21-rc2/kernel/power/power.h
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/power.h	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/power.h	2007-03-04 11:52:46.000000000 +0100
@@ -49,6 +49,8 @@ extern sector_t swsusp_resume_block;
 extern asmlinkage int swsusp_arch_suspend(void);
 extern asmlinkage int swsusp_arch_resume(void);
 
+extern int create_basic_memory_bitmaps(void);
+extern void free_basic_memory_bitmaps(void);
 extern unsigned int count_data_pages(void);
 
 /**
Index: linux-2.6.21-rc2/kernel/power/user.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/user.c	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/user.c	2007-03-04 14:02:19.000000000 +0100
@@ -52,6 +52,9 @@ static int snapshot_open(struct inode *i
 	if ((filp->f_flags & O_ACCMODE) == O_RDWR)
 		return -ENOSYS;
 
+	if(create_basic_memory_bitmaps())
+		return -ENOMEM;
+
 	nonseekable_open(inode, filp);
 	data = &snapshot_state;
 	filp->private_data = data;
@@ -77,6 +80,7 @@ static int snapshot_release(struct inode
 	struct snapshot_data *data;
 
 	swsusp_free();
+	free_basic_memory_bitmaps();
 	data = filp->private_data;
 	free_all_swap_pages(data->swap, data->bitmap);
 	free_bitmap(data->bitmap);
Index: linux-2.6.21-rc2/kernel/power/disk.c
===================================================================
--- linux-2.6.21-rc2.orig/kernel/power/disk.c	2007-03-04 11:30:18.000000000 +0100
+++ linux-2.6.21-rc2/kernel/power/disk.c	2007-03-04 11:52:46.000000000 +0100
@@ -127,14 +127,19 @@ int pm_suspend_disk(void)
 		mdelay(5000);
 		goto Thaw;
 	}
+	/* Allocate memory management structures */
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Thaw;
+
 	/* Free memory before shutting down devices. */
 	error = swsusp_shrink_memory();
 	if (error)
-		goto Thaw;
+		goto Finish;
 
 	error = platform_prepare();
 	if (error)
-		goto Thaw;
+		goto Finish;
 
 	suspend_console();
 	error = device_suspend(PMSG_FREEZE);
@@ -169,7 +174,7 @@ int pm_suspend_disk(void)
 			power_down(pm_disk_mode);
 		else {
 			swsusp_free();
-			goto Thaw;
+			goto Finish;
 		}
 	} else {
 		pr_debug("PM: Image restored successfully.\n");
@@ -182,6 +187,8 @@ int pm_suspend_disk(void)
 	platform_finish();
 	device_resume();
 	resume_console();
+ Finish:
+	free_basic_memory_bitmaps();
  Thaw:
 	unprepare_processes();
 	return error;
@@ -227,13 +234,15 @@ static int software_resume(void)
 	}
 
 	pr_debug("PM: Checking swsusp image.\n");
-
 	error = swsusp_check();
 	if (error)
-		goto Done;
+		goto Unlock;
 
-	pr_debug("PM: Preparing processes for restore.\n");
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Unlock;
 
+	pr_debug("PM: Preparing processes for restore.\n");
 	error = prepare_processes();
 	if (error) {
 		swsusp_close();
@@ -275,7 +284,9 @@ static int software_resume(void)
 	printk(KERN_ERR "PM: Restore failed, recovering.\n");
 	unprepare_processes();
  Done:
+	free_basic_memory_bitmaps();
 	/* For success case, the suspend path will release the lock */
+ Unlock:
 	mutex_unlock(&pm_mutex);
 	pr_debug("PM: Resume from disk failed.\n");
 	return 0;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 3/3] mm: Remove nosave and nosave_free page flags
  2007-03-04 13:50               ` Rafael J. Wysocki
@ 2007-03-04 14:08                 ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 14:08 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: linux-mm, Johannes Berg, Christoph Lameter, Peter Zijlstra, pm list

Remove two page flags that are no longer needed.

---
 include/linux/page-flags.h |   12 ------------
 1 file changed, 12 deletions(-)

Index: linux-2.6.21-rc2/include/linux/page-flags.h
===================================================================
--- linux-2.6.21-rc2.orig/include/linux/page-flags.h	2007-02-04 19:44:54.000000000 +0100
+++ linux-2.6.21-rc2/include/linux/page-flags.h	2007-03-04 13:37:57.000000000 +0100
@@ -82,13 +82,11 @@
 #define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
-#define PG_nosave		13	/* Used for system suspend/resume */
 #define PG_compound		14	/* Part of a compound page */
 #define PG_swapcache		15	/* Swap page: swp_entry_t in private */
 
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
 
@@ -212,16 +210,6 @@ static inline void SetPageUptodate(struc
 		ret;							\
 	})
 
-#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
-#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
-#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
-#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
-#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
-
-#define PageNosaveFree(page)	test_bit(PG_nosave_free, &(page)->flags)
-#define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
-#define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
-
 #define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
 #define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
 #define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [RFC][PATCH 3/3] mm: Remove nosave and nosave_free page flags
@ 2007-03-04 14:08                 ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-04 14:08 UTC (permalink / raw)
  To: Nick Piggin, Pavel Machek
  Cc: Christoph Lameter, linux-mm, pm list, Johannes Berg, Peter Zijlstra

Remove two page flags that are no longer needed.

---
 include/linux/page-flags.h |   12 ------------
 1 file changed, 12 deletions(-)

Index: linux-2.6.21-rc2/include/linux/page-flags.h
===================================================================
--- linux-2.6.21-rc2.orig/include/linux/page-flags.h	2007-02-04 19:44:54.000000000 +0100
+++ linux-2.6.21-rc2/include/linux/page-flags.h	2007-03-04 13:37:57.000000000 +0100
@@ -82,13 +82,11 @@
 #define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
-#define PG_nosave		13	/* Used for system suspend/resume */
 #define PG_compound		14	/* Part of a compound page */
 #define PG_swapcache		15	/* Swap page: swp_entry_t in private */
 
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
 
@@ -212,16 +210,6 @@ static inline void SetPageUptodate(struc
 		ret;							\
 	})
 
-#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
-#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
-#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
-#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
-#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
-
-#define PageNosaveFree(page)	test_bit(PG_nosave_free, &(page)->flags)
-#define SetPageNosaveFree(page)	set_bit(PG_nosave_free, &(page)->flags)
-#define ClearPageNosaveFree(page)		clear_bit(PG_nosave_free, &(page)->flags)
-
 #define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
 #define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
 #define __ClearPageBuddy(page)	__clear_bit(PG_buddy, &(page)->flags)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-04 13:50               ` Rafael J. Wysocki
@ 2007-03-08  1:00                 ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08  1:00 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list

On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:

> Okay, the next three messages contain patches that should do the trick.
> 
> They have been tested on x86_64, but not very thoroughly.

Looks nice, but I'm having some trouble with it. Solved too though :)

Thing is that I need to call register_nosave_region for a region
reserved for the IOMMU. Because the region is reserved so early during
boot I cannot call register_nosave_region at that time. However, I also
can't call register_nosave_region during a late initcall because at that
point bootmem can no longer be allocated. I could of course put a hook
somewhere into the arch code to do the marking, but I'd prefer not to.

The easiest solution I came up with is below. Of course, the suspend
patches for powerpc64 are still very much work in progress and I might
end up changing the whole reservation scheme after some feedback... If
nobody else needs this then don't think about it now.

However, would that patch be acceptable to you? What about error
handling? Printing a message and setting a "suspend not permitted"
variable would be great but I don't think such a variable exists. Also,
maybe passing in a gfp mask would be better (and we could use 0 to mean
bootmem too, I'd think)

Actually... I'd never have noticed this if register_nosave_region merged
regions. I have these two regions:
[    0.000000] swsusp: Registered nosave memory region: 0000000080000000 - 0000000100000000
[...]
[   19.406116] swsusp: Registered nosave memory region: 000000007f000000 - 0000000080000000
But they aren't merged, if they were the latter call wouldn't need to do
any allocations. Not that I'd want to rely on these positions!

With this patch and appropriate changes to my suspend code, it works.

johannes

---
Subject: [PATCH] swsusp: introduce register_nosave_region_late
From: Johannes Berg <johannes@sipsolutions.net>

This patch introduces a new register_nosave_region_late function that
can be called from initcalls when register_nosave_region can no longer
be used because it uses bootmem.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>

---
 include/linux/suspend.h |   11 ++++++++++-
 kernel/power/snapshot.c |   12 +++++++++---
 2 files changed, 19 insertions(+), 4 deletions(-)

--- linux-2.6-git.orig/include/linux/suspend.h	2007-03-08 01:25:37.248701500 +0100
+++ linux-2.6-git/include/linux/suspend.h	2007-03-08 01:41:49.967826495 +0100
@@ -36,7 +36,15 @@ static inline void pm_restore_console(vo
 /* kernel/power/swsusp.c */
 extern int software_suspend(void);
 /* kernel/power/snapshot.c */
-extern void __init register_nosave_region(unsigned long, unsigned long);
+extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
+static inline void register_nosave_region(unsigned long b, unsigned long e)
+{
+	__register_nosave_region(b, e, 0);
+}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e)
+{
+	__register_nosave_region(b, e, 1);
+}
 extern int swsusp_page_is_forbidden(struct page *);
 extern void swsusp_set_page_free(struct page *);
 extern void swsusp_unset_page_free(struct page *);
@@ -49,6 +57,7 @@ static inline int software_suspend(void)
 }
 
 static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
 static inline void swsusp_unset_page_free(struct page *p) {}
--- linux-2.6-git.orig/kernel/power/snapshot.c	2007-03-08 01:26:17.680701500 +0100
+++ linux-2.6-git/kernel/power/snapshot.c	2007-03-08 01:42:35.385826495 +0100
@@ -608,7 +608,8 @@ static LIST_HEAD(nosave_regions);
  */
 
 void __init
-register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
+			 int use_kmalloc)
 {
 	struct nosave_region *region;
 
@@ -624,8 +625,13 @@ register_nosave_region(unsigned long sta
 			goto Report;
 		}
 	}
-	/* This allocation cannot fail */
-	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	if (use_kmalloc) {
+		/* during init, this shouldn't fail */
+		region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
+		BUG_ON(!region);
+	} else
+		/* This allocation cannot fail */
+		region = alloc_bootmem_low(sizeof(struct nosave_region));
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08  1:00                 ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08  1:00 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:

> Okay, the next three messages contain patches that should do the trick.
> 
> They have been tested on x86_64, but not very thoroughly.

Looks nice, but I'm having some trouble with it. Solved too though :)

Thing is that I need to call register_nosave_region for a region
reserved for the IOMMU. Because the region is reserved so early during
boot I cannot call register_nosave_region at that time. However, I also
can't call register_nosave_region during a late initcall because at that
point bootmem can no longer be allocated. I could of course put a hook
somewhere into the arch code to do the marking, but I'd prefer not to.

The easiest solution I came up with is below. Of course, the suspend
patches for powerpc64 are still very much work in progress and I might
end up changing the whole reservation scheme after some feedback... If
nobody else needs this then don't think about it now.

However, would that patch be acceptable to you? What about error
handling? Printing a message and setting a "suspend not permitted"
variable would be great but I don't think such a variable exists. Also,
maybe passing in a gfp mask would be better (and we could use 0 to mean
bootmem too, I'd think)

Actually... I'd never have noticed this if register_nosave_region merged
regions. I have these two regions:
[    0.000000] swsusp: Registered nosave memory region: 0000000080000000 - 0000000100000000
[...]
[   19.406116] swsusp: Registered nosave memory region: 000000007f000000 - 0000000080000000
But they aren't merged, if they were the latter call wouldn't need to do
any allocations. Not that I'd want to rely on these positions!

With this patch and appropriate changes to my suspend code, it works.

johannes

---
Subject: [PATCH] swsusp: introduce register_nosave_region_late
From: Johannes Berg <johannes@sipsolutions.net>

This patch introduces a new register_nosave_region_late function that
can be called from initcalls when register_nosave_region can no longer
be used because it uses bootmem.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>

---
 include/linux/suspend.h |   11 ++++++++++-
 kernel/power/snapshot.c |   12 +++++++++---
 2 files changed, 19 insertions(+), 4 deletions(-)

--- linux-2.6-git.orig/include/linux/suspend.h	2007-03-08 01:25:37.248701500 +0100
+++ linux-2.6-git/include/linux/suspend.h	2007-03-08 01:41:49.967826495 +0100
@@ -36,7 +36,15 @@ static inline void pm_restore_console(vo
 /* kernel/power/swsusp.c */
 extern int software_suspend(void);
 /* kernel/power/snapshot.c */
-extern void __init register_nosave_region(unsigned long, unsigned long);
+extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
+static inline void register_nosave_region(unsigned long b, unsigned long e)
+{
+	__register_nosave_region(b, e, 0);
+}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e)
+{
+	__register_nosave_region(b, e, 1);
+}
 extern int swsusp_page_is_forbidden(struct page *);
 extern void swsusp_set_page_free(struct page *);
 extern void swsusp_unset_page_free(struct page *);
@@ -49,6 +57,7 @@ static inline int software_suspend(void)
 }
 
 static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
 static inline void swsusp_unset_page_free(struct page *p) {}
--- linux-2.6-git.orig/kernel/power/snapshot.c	2007-03-08 01:26:17.680701500 +0100
+++ linux-2.6-git/kernel/power/snapshot.c	2007-03-08 01:42:35.385826495 +0100
@@ -608,7 +608,8 @@ static LIST_HEAD(nosave_regions);
  */
 
 void __init
-register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
+			 int use_kmalloc)
 {
 	struct nosave_region *region;
 
@@ -624,8 +625,13 @@ register_nosave_region(unsigned long sta
 			goto Report;
 		}
 	}
-	/* This allocation cannot fail */
-	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	if (use_kmalloc) {
+		/* during init, this shouldn't fail */
+		region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
+		BUG_ON(!region);
+	} else
+		/* This allocation cannot fail */
+		region = alloc_bootmem_low(sizeof(struct nosave_region));
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-04 13:50               ` Rafael J. Wysocki
@ 2007-03-08 15:09                 ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 15:09 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list


[-- Attachment #1.1: Type: text/plain, Size: 429 bytes --]


> Okay, the next three messages contain patches that should do the trick.
> 
> They have been tested on x86_64, but not very thoroughly.

Works on my powerbook as well. Never mind that usb is broken again with
suspend to disk. And my own patches break both str and std right now.

But these (on top of wireless-dev which is currently about 2.6.21-rc2)
work fine as long as I assume they don't break usb ;)

johannes

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 15:09                 ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 15:09 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 429 bytes --]


> Okay, the next three messages contain patches that should do the trick.
> 
> They have been tested on x86_64, but not very thoroughly.

Works on my powerbook as well. Never mind that usb is broken again with
suspend to disk. And my own patches break both str and std right now.

But these (on top of wireless-dev which is currently about 2.6.21-rc2)
work fine as long as I assume they don't break usb ;)

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-04 13:50               ` Rafael J. Wysocki
@ 2007-03-08 15:53                 ` Peter Zijlstra
  -1 siblings, 0 replies; 85+ messages in thread
From: Peter Zijlstra @ 2007-03-08 15:53 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, linux-mm, Christoph Lameter,
	Johannes Berg, pm list

On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:

> Okay, the next three messages contain patches that should do the trick.
> 
> They have been tested on x86_64, but not very thoroughly.

They look good to me, but what do I know about swsusp :-) 
I'll stick them in my laptop's kernel (boring i386-UP) and see what
happens.

I did notice you don't stick KERN_ prio markers in your printk()s not
sure what to think of that (or if its common practise and I'm just not
aware of it).

Thanks for doing this.

Peter

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 15:53                 ` Peter Zijlstra
  0 siblings, 0 replies; 85+ messages in thread
From: Peter Zijlstra @ 2007-03-08 15:53 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg

On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:

> Okay, the next three messages contain patches that should do the trick.
> 
> They have been tested on x86_64, but not very thoroughly.

They look good to me, but what do I know about swsusp :-) 
I'll stick them in my laptop's kernel (boring i386-UP) and see what
happens.

I did notice you don't stick KERN_ prio markers in your printk()s not
sure what to think of that (or if its common practise and I'm just not
aware of it).

Thanks for doing this.

Peter



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08  1:00                 ` Johannes Berg
@ 2007-03-08 22:05                   ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:05 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list

On Thursday, 8 March 2007 02:00, Johannes Berg wrote:
> On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:
> 
> > Okay, the next three messages contain patches that should do the trick.
> > 
> > They have been tested on x86_64, but not very thoroughly.
> 
> Looks nice, but I'm having some trouble with it. Solved too though :)
> 
> Thing is that I need to call register_nosave_region for a region
> reserved for the IOMMU. Because the region is reserved so early during
> boot I cannot call register_nosave_region at that time. However, I also
> can't call register_nosave_region during a late initcall because at that
> point bootmem can no longer be allocated. I could of course put a hook
> somewhere into the arch code to do the marking, but I'd prefer not to.
> 
> The easiest solution I came up with is below. Of course, the suspend
> patches for powerpc64 are still very much work in progress and I might
> end up changing the whole reservation scheme after some feedback... If
> nobody else needs this then don't think about it now.

Well, it may be needed for other things too.

> However, would that patch be acceptable to you? What about error
> handling? Printing a message and setting a "suspend not permitted"
> variable would be great but I don't think such a variable exists.

You're right, there's nothing like that.

> Also, maybe passing in a gfp mask would be better (and we could use 0 to mean
> bootmem too, I'd think)

I think we should pass a mask.  BTW, can you please check if the appended patch
is sufficient?

> Actually... I'd never have noticed this if register_nosave_region merged
> regions. I have these two regions:
> [    0.000000] swsusp: Registered nosave memory region: 0000000080000000 - 0000000100000000
> [...]
> [   19.406116] swsusp: Registered nosave memory region: 000000007f000000 - 0000000080000000
> But they aren't merged, if they were the latter call wouldn't need to do
> any allocations. Not that I'd want to rely on these positions!

It only merges regions passed in the right order (ie. sorted).

> With this patch and appropriate changes to my suspend code, it works.

OK, thanks for testing!

Greetings,
Rafael


---
 kernel/power/snapshot.c |   14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Index: linux-2.6.21-rc3/kernel/power/snapshot.c
===================================================================
--- linux-2.6.21-rc3.orig/kernel/power/snapshot.c
+++ linux-2.6.21-rc3/kernel/power/snapshot.c
@@ -624,8 +624,18 @@ register_nosave_region(unsigned long sta
 			goto Report;
 		}
 	}
-	/* This allocation cannot fail */
-	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	if (system_state == SYSTEM_BOOTING) {
+		/* This allocation cannot fail */
+		region = alloc_bootmem_low(sizeof(struct nosave_region));
+	} else {
+		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
+		if (!region) {
+			printk(KERN_WARNING "swsusp: Not enough memory "
+				"to register a nosave region!\n");
+			WARN_ON(1);
+			return;
+		}
+	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:05                   ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:05 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

On Thursday, 8 March 2007 02:00, Johannes Berg wrote:
> On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:
> 
> > Okay, the next three messages contain patches that should do the trick.
> > 
> > They have been tested on x86_64, but not very thoroughly.
> 
> Looks nice, but I'm having some trouble with it. Solved too though :)
> 
> Thing is that I need to call register_nosave_region for a region
> reserved for the IOMMU. Because the region is reserved so early during
> boot I cannot call register_nosave_region at that time. However, I also
> can't call register_nosave_region during a late initcall because at that
> point bootmem can no longer be allocated. I could of course put a hook
> somewhere into the arch code to do the marking, but I'd prefer not to.
> 
> The easiest solution I came up with is below. Of course, the suspend
> patches for powerpc64 are still very much work in progress and I might
> end up changing the whole reservation scheme after some feedback... If
> nobody else needs this then don't think about it now.

Well, it may be needed for other things too.

> However, would that patch be acceptable to you? What about error
> handling? Printing a message and setting a "suspend not permitted"
> variable would be great but I don't think such a variable exists.

You're right, there's nothing like that.

> Also, maybe passing in a gfp mask would be better (and we could use 0 to mean
> bootmem too, I'd think)

I think we should pass a mask.  BTW, can you please check if the appended patch
is sufficient?

> Actually... I'd never have noticed this if register_nosave_region merged
> regions. I have these two regions:
> [    0.000000] swsusp: Registered nosave memory region: 0000000080000000 - 0000000100000000
> [...]
> [   19.406116] swsusp: Registered nosave memory region: 000000007f000000 - 0000000080000000
> But they aren't merged, if they were the latter call wouldn't need to do
> any allocations. Not that I'd want to rely on these positions!

It only merges regions passed in the right order (ie. sorted).

> With this patch and appropriate changes to my suspend code, it works.

OK, thanks for testing!

Greetings,
Rafael


---
 kernel/power/snapshot.c |   14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Index: linux-2.6.21-rc3/kernel/power/snapshot.c
===================================================================
--- linux-2.6.21-rc3.orig/kernel/power/snapshot.c
+++ linux-2.6.21-rc3/kernel/power/snapshot.c
@@ -624,8 +624,18 @@ register_nosave_region(unsigned long sta
 			goto Report;
 		}
 	}
-	/* This allocation cannot fail */
-	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	if (system_state == SYSTEM_BOOTING) {
+		/* This allocation cannot fail */
+		region = alloc_bootmem_low(sizeof(struct nosave_region));
+	} else {
+		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
+		if (!region) {
+			printk(KERN_WARNING "swsusp: Not enough memory "
+				"to register a nosave region!\n");
+			WARN_ON(1);
+			return;
+		}
+	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 15:09                 ` Johannes Berg
@ 2007-03-08 22:10                   ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:10 UTC (permalink / raw)
  To: Johannes Berg, Pavel Machek
  Cc: Nick Piggin, pm list, Christoph Lameter, Peter Zijlstra, linux-mm

On Thursday, 8 March 2007 16:09, Johannes Berg wrote:
> 
> > Okay, the next three messages contain patches that should do the trick.
> > 
> > They have been tested on x86_64, but not very thoroughly.
> 
> Works on my powerbook as well. Never mind that usb is broken again with
> suspend to disk. And my own patches break both str and std right now.

Ouch.

> But these (on top of wireless-dev which is currently about 2.6.21-rc2)
> work fine as long as I assume they don't break usb ;)

Well, on my boxes they don't. ;-)

Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:10                   ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:10 UTC (permalink / raw)
  To: Johannes Berg, Pavel Machek
  Cc: Nick Piggin, Christoph Lameter, linux-mm, pm list, Peter Zijlstra

On Thursday, 8 March 2007 16:09, Johannes Berg wrote:
> 
> > Okay, the next three messages contain patches that should do the trick.
> > 
> > They have been tested on x86_64, but not very thoroughly.
> 
> Works on my powerbook as well. Never mind that usb is broken again with
> suspend to disk. And my own patches break both str and std right now.

Ouch.

> But these (on top of wireless-dev which is currently about 2.6.21-rc2)
> work fine as long as I assume they don't break usb ;)

Well, on my boxes they don't. ;-)

Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:05                   ` Rafael J. Wysocki
@ 2007-03-08 22:10                     ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:10 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list


[-- Attachment #1.1: Type: text/plain, Size: 1487 bytes --]

On Thu, 2007-03-08 at 23:05 +0100, Rafael J. Wysocki wrote:

> > The easiest solution I came up with is below. Of course, the suspend
> > patches for powerpc64 are still very much work in progress and I might
> > end up changing the whole reservation scheme after some feedback... If
> > nobody else needs this then don't think about it now.
> 
> Well, it may be needed for other things too.

Yeah, but it's probably better to wait for them :)

> I think we should pass a mask.  BTW, can you please check if the appended patch
> is sufficient?

Unfortunately I won't be able to actually try this on hardware until the
20th or so.

> > With this patch and appropriate changes to my suspend code, it works.
> 
> OK, thanks for testing!

Forgot to mention, patches are at
http://johannes.sipsolutions.net/patches/ look for the latest
powerpc-suspend-* patchset.

> +	if (system_state == SYSTEM_BOOTING) {
> +		/* This allocation cannot fail */
> +		region = alloc_bootmem_low(sizeof(struct nosave_region));
> +	} else {
> +		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
> +		if (!region) {
> +			printk(KERN_WARNING "swsusp: Not enough memory "
> +				"to register a nosave region!\n");
> +			WARN_ON(1);
> +			return;
> +		}
> +	}

I don't think that'll be sufficient, system_state = SYSTEM_BOOTING is
done only in init/main.c:init_post which is done after after calling the
initcalls (they are called in do_basic_setup)

johannes

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:10                     ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:10 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 1487 bytes --]

On Thu, 2007-03-08 at 23:05 +0100, Rafael J. Wysocki wrote:

> > The easiest solution I came up with is below. Of course, the suspend
> > patches for powerpc64 are still very much work in progress and I might
> > end up changing the whole reservation scheme after some feedback... If
> > nobody else needs this then don't think about it now.
> 
> Well, it may be needed for other things too.

Yeah, but it's probably better to wait for them :)

> I think we should pass a mask.  BTW, can you please check if the appended patch
> is sufficient?

Unfortunately I won't be able to actually try this on hardware until the
20th or so.

> > With this patch and appropriate changes to my suspend code, it works.
> 
> OK, thanks for testing!

Forgot to mention, patches are at
http://johannes.sipsolutions.net/patches/ look for the latest
powerpc-suspend-* patchset.

> +	if (system_state == SYSTEM_BOOTING) {
> +		/* This allocation cannot fail */
> +		region = alloc_bootmem_low(sizeof(struct nosave_region));
> +	} else {
> +		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
> +		if (!region) {
> +			printk(KERN_WARNING "swsusp: Not enough memory "
> +				"to register a nosave region!\n");
> +			WARN_ON(1);
> +			return;
> +		}
> +	}

I don't think that'll be sufficient, system_state = SYSTEM_BOOTING is
done only in init/main.c:init_post which is done after after calling the
initcalls (they are called in do_basic_setup)

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 15:53                 ` Peter Zijlstra
@ 2007-03-08 22:11                   ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nick Piggin, Pavel Machek, linux-mm, Christoph Lameter,
	Johannes Berg, pm list

On Thursday, 8 March 2007 16:53, Peter Zijlstra wrote:
> On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:
> 
> > Okay, the next three messages contain patches that should do the trick.
> > 
> > They have been tested on x86_64, but not very thoroughly.
> 
> They look good to me, but what do I know about swsusp :-) 
> I'll stick them in my laptop's kernel (boring i386-UP) and see what
> happens.

Thanks!

> I did notice you don't stick KERN_ prio markers in your printk()s not
> sure what to think of that (or if its common practise and I'm just not
> aware of it).

This means use the default (which initially is KERN_WARNING, AFAICT).

> Thanks for doing this.

You're welcome. :-)

Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:11                   ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg

On Thursday, 8 March 2007 16:53, Peter Zijlstra wrote:
> On Sun, 2007-03-04 at 14:50 +0100, Rafael J. Wysocki wrote:
> 
> > Okay, the next three messages contain patches that should do the trick.
> > 
> > They have been tested on x86_64, but not very thoroughly.
> 
> They look good to me, but what do I know about swsusp :-) 
> I'll stick them in my laptop's kernel (boring i386-UP) and see what
> happens.

Thanks!

> I did notice you don't stick KERN_ prio markers in your printk()s not
> sure what to think of that (or if its common practise and I'm just not
> aware of it).

This means use the default (which initially is KERN_WARNING, AFAICT).

> Thanks for doing this.

You're welcome. :-)

Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:10                   ` Rafael J. Wysocki
@ 2007-03-08 22:12                     ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list


[-- Attachment #1.1: Type: text/plain, Size: 907 bytes --]

On Thu, 2007-03-08 at 23:10 +0100, Rafael J. Wysocki wrote:

> > Works on my powerbook as well. Never mind that usb is broken again with
> > suspend to disk. And my own patches break both str and std right now.
> 
> Ouch.

Actually. Some fluke or mismanagement of patches, my own patches are
fine (the suspend set I just gave you the link to). Probably screwed
something up during testing earlier.

> > But these (on top of wireless-dev which is currently about 2.6.21-rc2)
> > work fine as long as I assume they don't break usb ;)
> 
> Well, on my boxes they don't. ;-)

Good :)
I think usb suspend is broken on my machine with and without these
patches but I haven't tested it. I may debug it more some time, or just
leave it since it works with str and I hardly ever std. Then again,
maybe it's fine in -rc3, I'm still at -rc2 (due to wireless-dev not
being up to date)

johannes

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:12                     ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:12 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Nick Piggin, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 907 bytes --]

On Thu, 2007-03-08 at 23:10 +0100, Rafael J. Wysocki wrote:

> > Works on my powerbook as well. Never mind that usb is broken again with
> > suspend to disk. And my own patches break both str and std right now.
> 
> Ouch.

Actually. Some fluke or mismanagement of patches, my own patches are
fine (the suspend set I just gave you the link to). Probably screwed
something up during testing earlier.

> > But these (on top of wireless-dev which is currently about 2.6.21-rc2)
> > work fine as long as I assume they don't break usb ;)
> 
> Well, on my boxes they don't. ;-)

Good :)
I think usb suspend is broken on my machine with and without these
patches but I haven't tested it. I may debug it more some time, or just
leave it since it works with str and I hardly ever std. Then again,
maybe it's fine in -rc3, I'm still at -rc2 (due to wireless-dev not
being up to date)

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:10                     ` Johannes Berg
@ 2007-03-08 22:33                       ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:33 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list

On Thursday, 8 March 2007 23:10, Johannes Berg wrote:
> On Thu, 2007-03-08 at 23:05 +0100, Rafael J. Wysocki wrote:
> 
> > > The easiest solution I came up with is below. Of course, the suspend
> > > patches for powerpc64 are still very much work in progress and I might
> > > end up changing the whole reservation scheme after some feedback... If
> > > nobody else needs this then don't think about it now.
> > 
> > Well, it may be needed for other things too.
> 
> Yeah, but it's probably better to wait for them :)

Agreed.

> > I think we should pass a mask.  BTW, can you please check if the appended patch
> > is sufficient?
> 
> Unfortunately I won't be able to actually try this on hardware until the
> 20th or so.

OK, it's not an urgent thing. ;-)

> > > With this patch and appropriate changes to my suspend code, it works.
> > 
> > OK, thanks for testing!
> 
> Forgot to mention, patches are at
> http://johannes.sipsolutions.net/patches/ look for the latest
> powerpc-suspend-* patchset.

Thanks for the link.
 
> > +	if (system_state == SYSTEM_BOOTING) {
> > +		/* This allocation cannot fail */
> > +		region = alloc_bootmem_low(sizeof(struct nosave_region));
> > +	} else {
> > +		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
> > +		if (!region) {
> > +			printk(KERN_WARNING "swsusp: Not enough memory "
> > +				"to register a nosave region!\n");
> > +			WARN_ON(1);
> > +			return;
> > +		}
> > +	}
> 
> I don't think that'll be sufficient, system_state = SYSTEM_BOOTING is
> done only in init/main.c:init_post which is done after after calling the
> initcalls (they are called in do_basic_setup)

Well, I don't think so.  If I understand the definition of system_state
correctly, it is initially equal to SYSTEM_BOOTING.  Then, it's changed to
SYSTEM_RUNNING in init/main.c after the bootmem has been freed.

Anyway, the patch works on x86_64. :-)

Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:33                       ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:33 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

On Thursday, 8 March 2007 23:10, Johannes Berg wrote:
> On Thu, 2007-03-08 at 23:05 +0100, Rafael J. Wysocki wrote:
> 
> > > The easiest solution I came up with is below. Of course, the suspend
> > > patches for powerpc64 are still very much work in progress and I might
> > > end up changing the whole reservation scheme after some feedback... If
> > > nobody else needs this then don't think about it now.
> > 
> > Well, it may be needed for other things too.
> 
> Yeah, but it's probably better to wait for them :)

Agreed.

> > I think we should pass a mask.  BTW, can you please check if the appended patch
> > is sufficient?
> 
> Unfortunately I won't be able to actually try this on hardware until the
> 20th or so.

OK, it's not an urgent thing. ;-)

> > > With this patch and appropriate changes to my suspend code, it works.
> > 
> > OK, thanks for testing!
> 
> Forgot to mention, patches are at
> http://johannes.sipsolutions.net/patches/ look for the latest
> powerpc-suspend-* patchset.

Thanks for the link.
 
> > +	if (system_state == SYSTEM_BOOTING) {
> > +		/* This allocation cannot fail */
> > +		region = alloc_bootmem_low(sizeof(struct nosave_region));
> > +	} else {
> > +		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
> > +		if (!region) {
> > +			printk(KERN_WARNING "swsusp: Not enough memory "
> > +				"to register a nosave region!\n");
> > +			WARN_ON(1);
> > +			return;
> > +		}
> > +	}
> 
> I don't think that'll be sufficient, system_state = SYSTEM_BOOTING is
> done only in init/main.c:init_post which is done after after calling the
> initcalls (they are called in do_basic_setup)

Well, I don't think so.  If I understand the definition of system_state
correctly, it is initially equal to SYSTEM_BOOTING.  Then, it's changed to
SYSTEM_RUNNING in init/main.c after the bootmem has been freed.

Anyway, the patch works on x86_64. :-)

Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:33                       ` Rafael J. Wysocki
@ 2007-03-08 22:43                         ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:43 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list


[-- Attachment #1.1: Type: text/plain, Size: 1000 bytes --]

On Thu, 2007-03-08 at 23:33 +0100, Rafael J. Wysocki wrote:

> > Unfortunately I won't be able to actually try this on hardware until the
> > 20th or so.
> 
> OK, it's not an urgent thing. ;-)

True :)

> Well, I don't think so.  If I understand the definition of system_state
> correctly, it is initially equal to SYSTEM_BOOTING.  Then, it's changed to
> SYSTEM_RUNNING in init/main.c after the bootmem has been freed.

No, I think you're confusing bootmem with initmem right now. If you
actually look at the code then free_all_bootmem is called as part of
mem_init() on powerpc, which is called from start_kernel() a long time
before initcalls are done and system state is set.

Put it this way. By the time initcalls are done, I can no longer use
bootmem. I tested this and it panics. But if you look at the code in
init/main.c, system_state is only changed after initcalls are done.

> Anyway, the patch works on x86_64. :-)

Yeah but it worked before too ;)

johannes

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:43                         ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:43 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 1000 bytes --]

On Thu, 2007-03-08 at 23:33 +0100, Rafael J. Wysocki wrote:

> > Unfortunately I won't be able to actually try this on hardware until the
> > 20th or so.
> 
> OK, it's not an urgent thing. ;-)

True :)

> Well, I don't think so.  If I understand the definition of system_state
> correctly, it is initially equal to SYSTEM_BOOTING.  Then, it's changed to
> SYSTEM_RUNNING in init/main.c after the bootmem has been freed.

No, I think you're confusing bootmem with initmem right now. If you
actually look at the code then free_all_bootmem is called as part of
mem_init() on powerpc, which is called from start_kernel() a long time
before initcalls are done and system state is set.

Put it this way. By the time initcalls are done, I can no longer use
bootmem. I tested this and it panics. But if you look at the code in
init/main.c, system_state is only changed after initcalls are done.

> Anyway, the patch works on x86_64. :-)

Yeah but it worked before too ;)

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:43                         ` Johannes Berg
@ 2007-03-08 22:54                           ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:54 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list

On Thursday, 8 March 2007 23:43, Johannes Berg wrote:
> On Thu, 2007-03-08 at 23:33 +0100, Rafael J. Wysocki wrote:
> 
> > > Unfortunately I won't be able to actually try this on hardware until the
> > > 20th or so.
> > 
> > OK, it's not an urgent thing. ;-)
> 
> True :)
> 
> > Well, I don't think so.  If I understand the definition of system_state
> > correctly, it is initially equal to SYSTEM_BOOTING.  Then, it's changed to
> > SYSTEM_RUNNING in init/main.c after the bootmem has been freed.
> 
> No, I think you're confusing bootmem with initmem right now.

Yes, you're right, sorry.

> If you actually look at the code then free_all_bootmem is called as part of
> mem_init() on powerpc, which is called from start_kernel() a long time
> before initcalls are done and system state is set.
> 
> Put it this way. By the time initcalls are done, I can no longer use
> bootmem. I tested this and it panics. But if you look at the code in
> init/main.c, system_state is only changed after initcalls are done.

That's true.

In that case your patch seems to be the simplest one and I think it should go
along with some code that will actually use it.

Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:54                           ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 22:54 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

On Thursday, 8 March 2007 23:43, Johannes Berg wrote:
> On Thu, 2007-03-08 at 23:33 +0100, Rafael J. Wysocki wrote:
> 
> > > Unfortunately I won't be able to actually try this on hardware until the
> > > 20th or so.
> > 
> > OK, it's not an urgent thing. ;-)
> 
> True :)
> 
> > Well, I don't think so.  If I understand the definition of system_state
> > correctly, it is initially equal to SYSTEM_BOOTING.  Then, it's changed to
> > SYSTEM_RUNNING in init/main.c after the bootmem has been freed.
> 
> No, I think you're confusing bootmem with initmem right now.

Yes, you're right, sorry.

> If you actually look at the code then free_all_bootmem is called as part of
> mem_init() on powerpc, which is called from start_kernel() a long time
> before initcalls are done and system state is set.
> 
> Put it this way. By the time initcalls are done, I can no longer use
> bootmem. I tested this and it panics. But if you look at the code in
> init/main.c, system_state is only changed after initcalls are done.

That's true.

In that case your patch seems to be the simplest one and I think it should go
along with some code that will actually use it.

Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:54                           ` Rafael J. Wysocki
@ 2007-03-08 22:54                             ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:54 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list


[-- Attachment #1.1: Type: text/plain, Size: 474 bytes --]

On Thu, 2007-03-08 at 23:54 +0100, Rafael J. Wysocki wrote:

> In that case your patch seems to be the simplest one and I think it should go
> along with some code that will actually use it.

Right. So if anyone else needs it feel free to pick up my patch, if not
I'll submit it again as part of my "suspend on powermac G5" patchset
when that is properly reviewed by the appropriate people. Assuming, of
course, that this (yours) patchset is picked up.

johannes

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 22:54                             ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 22:54 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 474 bytes --]

On Thu, 2007-03-08 at 23:54 +0100, Rafael J. Wysocki wrote:

> In that case your patch seems to be the simplest one and I think it should go
> along with some code that will actually use it.

Right. So if anyone else needs it feel free to pick up my patch, if not
I'll submit it again as part of my "suspend on powermac G5" patchset
when that is properly reviewed by the appropriate people. Assuming, of
course, that this (yours) patchset is picked up.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 22:05                   ` Rafael J. Wysocki
@ 2007-03-08 23:15                     ` Pavel Machek
  -1 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-08 23:15 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, linux-mm, Christoph Lameter,
	Johannes Berg, pm list

Hi!

> +		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
> +		if (!region) {
> +			printk(KERN_WARNING "swsusp: Not enough memory "
> +				"to register a nosave region!\n");
> +			WARN_ON(1);
> +			return;
> +		}

That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
enough. It is not a bug, they just fail.
								Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 23:15                     ` Pavel Machek
  0 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-08 23:15 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Nick Piggin, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

Hi!

> +		region = kzalloc(sizeof(struct nosave_region), GFP_ATOMIC);
> +		if (!region) {
> +			printk(KERN_WARNING "swsusp: Not enough memory "
> +				"to register a nosave region!\n");
> +			WARN_ON(1);
> +			return;
> +		}

That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
enough. It is not a bug, they just fail.
								Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 23:15                     ` Pavel Machek
@ 2007-03-08 23:21                       ` Johannes Berg
  -1 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 23:21 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Nick Piggin, Peter Zijlstra, linux-mm, Christoph Lameter, pm list


[-- Attachment #1.1: Type: text/plain, Size: 589 bytes --]

On Fri, 2007-03-09 at 00:15 +0100, Pavel Machek wrote:

> That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> enough. It is not a bug, they just fail.

But like I said in my post, there's no way we can disable suspend to
disk when they do, right now anyway. Also, this can't be called any
later than a late initcall or such since it's __init, and thus there
shouldn't be memory pressure yet that would cause this to fail.

In any case, I'd be much happier with having a "disable suspend"
variable so we could print a big warning and set that flag.

johannes

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 23:21                       ` Johannes Berg
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Berg @ 2007-03-08 23:21 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Rafael J. Wysocki, Nick Piggin, Christoph Lameter, linux-mm,
	pm list, Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 589 bytes --]

On Fri, 2007-03-09 at 00:15 +0100, Pavel Machek wrote:

> That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> enough. It is not a bug, they just fail.

But like I said in my post, there's no way we can disable suspend to
disk when they do, right now anyway. Also, this can't be called any
later than a late initcall or such since it's __init, and thus there
shouldn't be memory pressure yet that would cause this to fail.

In any case, I'd be much happier with having a "disable suspend"
variable so we could print a big warning and set that flag.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 23:21                       ` Johannes Berg
@ 2007-03-08 23:23                         ` Pavel Machek
  -1 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-08 23:23 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Peter Zijlstra, linux-mm, Christoph Lameter, pm list

Hi!

> > That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> > enough. It is not a bug, they just fail.
> 
> But like I said in my post, there's no way we can disable suspend to
> disk when they do, right now anyway. Also, this can't be called any
> later than a late initcall or such since it's __init, and thus there
> shouldn't be memory pressure yet that would cause this to fail.
> 
> In any case, I'd be much happier with having a "disable suspend"
> variable so we could print a big warning and set that flag.

Patch would probably be accepted ;-).
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 23:23                         ` Pavel Machek
  0 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-08 23:23 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Rafael J. Wysocki, Nick Piggin, Christoph Lameter, linux-mm,
	pm list, Peter Zijlstra

Hi!

> > That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> > enough. It is not a bug, they just fail.
> 
> But like I said in my post, there's no way we can disable suspend to
> disk when they do, right now anyway. Also, this can't be called any
> later than a late initcall or such since it's __init, and thus there
> shouldn't be memory pressure yet that would cause this to fail.
> 
> In any case, I'd be much happier with having a "disable suspend"
> variable so we could print a big warning and set that flag.

Patch would probably be accepted ;-).
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 23:21                       ` Johannes Berg
@ 2007-03-08 23:34                         ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 23:34 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Nick Piggin, Peter Zijlstra, Pavel Machek, linux-mm,
	Christoph Lameter, pm list

On Friday, 9 March 2007 00:21, Johannes Berg wrote:
> On Fri, 2007-03-09 at 00:15 +0100, Pavel Machek wrote:
> 
> > That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> > enough. It is not a bug, they just fail.
> 
> But like I said in my post, there's no way we can disable suspend to
> disk when they do, right now anyway. Also, this can't be called any
> later than a late initcall or such since it's __init, and thus there
> shouldn't be memory pressure yet that would cause this to fail.

Exactly.  If an atomic allocation fails at this stage, there is a bug IMHO
(although not necessarily in our code).

Still, the patch is not sufficient, so that's just a theoretical thing.

> In any case, I'd be much happier with having a "disable suspend"
> variable so we could print a big warning and set that flag.

Well, I think that if we can't get so little memory at this early stage, the
kernel will have much more trouble anyway. ;-)

Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 23:34                         ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-08 23:34 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Pavel Machek, Nick Piggin, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

On Friday, 9 March 2007 00:21, Johannes Berg wrote:
> On Fri, 2007-03-09 at 00:15 +0100, Pavel Machek wrote:
> 
> > That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> > enough. It is not a bug, they just fail.
> 
> But like I said in my post, there's no way we can disable suspend to
> disk when they do, right now anyway. Also, this can't be called any
> later than a late initcall or such since it's __init, and thus there
> shouldn't be memory pressure yet that would cause this to fail.

Exactly.  If an atomic allocation fails at this stage, there is a bug IMHO
(although not necessarily in our code).

Still, the patch is not sufficient, so that's just a theoretical thing.

> In any case, I'd be much happier with having a "disable suspend"
> variable so we could print a big warning and set that flag.

Well, I think that if we can't get so little memory at this early stage, the
kernel will have much more trouble anyway. ;-)

Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
  2007-03-08 23:34                         ` Rafael J. Wysocki
@ 2007-03-08 23:36                           ` Pavel Machek
  -1 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-08 23:36 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Nick Piggin, Peter Zijlstra, linux-mm, Christoph Lameter,
	Johannes Berg, pm list

Hi!

> > > That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> > > enough. It is not a bug, they just fail.
> > 
> > But like I said in my post, there's no way we can disable suspend to
> > disk when they do, right now anyway. Also, this can't be called any
> > later than a late initcall or such since it's __init, and thus there
> > shouldn't be memory pressure yet that would cause this to fail.
> 
> Exactly.  If an atomic allocation fails at this stage, there is a bug IMHO
> (although not necessarily in our code).

Ok, so just do a BUG().	 WARN_ON(), then do something subtly wrong
during suspend is evil.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend)
@ 2007-03-08 23:36                           ` Pavel Machek
  0 siblings, 0 replies; 85+ messages in thread
From: Pavel Machek @ 2007-03-08 23:36 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Nick Piggin, Christoph Lameter, linux-mm, pm list,
	Peter Zijlstra

Hi!

> > > That's a no-no. ATOMIC alocations can fail, and no, WARN_ON is not
> > > enough. It is not a bug, they just fail.
> > 
> > But like I said in my post, there's no way we can disable suspend to
> > disk when they do, right now anyway. Also, this can't be called any
> > later than a late initcall or such since it's __init, and thus there
> > shouldn't be memory pressure yet that would cause this to fail.
> 
> Exactly.  If an atomic allocation fails at this stage, there is a bug IMHO
> (although not necessarily in our code).

Ok, so just do a BUG().	 WARN_ON(), then do something subtly wrong
during suspend is evil.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Remove page flags for software suspend
  2007-03-01 17:48           ` Remove page flags for software suspend Hugh Dickins
@ 2007-03-13  3:36             ` Nick Piggin
  0 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  3:36 UTC (permalink / raw)
  To: Hugh Dickins; +Cc: Rafael J. Wysocki, Christoph Lameter, Pavel Machek, linux-mm

Sorry to take so long to reply. I was having issues with this account.

Hugh Dickins wrote:
> On Thu, 1 Mar 2007, Nick Piggin wrote:
> 
>>Let's make sure that no more backdoor page flags get allocated without
>>going through the linux-mm list to work out whether we really need it
>>or can live without it...
> 
> 
> On Fri, 2 Mar 2007, Nick Piggin wrote:
> 
>>I need one bit for lockless pagecache ;)
> 
> 
> Is that still your PageNoNewRefs thing?

Yes.

> What was wrong with my atomic_cmpxchg suggestion?

It is a very good suggestion. I think I ran into an issue where I
had wanted to set PG_nonewrefs for a page with an elevated refcount
or some other issue like that. Can't remember exactly -- I think it
is fixable by reworking some code, but I had wanted to do taht as a
subsequent patch.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 1/3] swsusp: Do not use page flags directly
  2007-03-04 14:07                 ` Rafael J. Wysocki
@ 2007-03-13  4:45                   ` Nick Piggin
  -1 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  4:45 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

Rafael J. Wysocki wrote:
> Make swsusp stop using SetPageNosave(), SetPageNosaveFree() and friends
> directly.
> 
> This way the amount of changes made in the next patch is smaller.
> 
> ---
>  include/linux/suspend.h |   33 +++++++++++++++++++++++++++++++++
>  kernel/power/snapshot.c |   48 +++++++++++++++++++++++++-----------------------
>  mm/page_alloc.c         |    6 +++---
>  3 files changed, 61 insertions(+), 26 deletions(-)
> 
> Index: linux-2.6.21-rc2/include/linux/suspend.h
> ===================================================================
> --- linux-2.6.21-rc2.orig/include/linux/suspend.h	2007-03-02 09:05:53.000000000 +0100
> +++ linux-2.6.21-rc2/include/linux/suspend.h	2007-03-02 09:24:02.000000000 +0100
> @@ -8,6 +8,7 @@
>  #include <linux/notifier.h>
>  #include <linux/init.h>
>  #include <linux/pm.h>
> +#include <linux/mm.h>
>  
>  /* struct pbe is used for creating lists of pages that should be restored
>   * atomically during the resume from disk, because the page frames they have
> @@ -49,6 +50,38 @@ void __save_processor_state(struct saved
>  void __restore_processor_state(struct saved_context *ctxt);
>  unsigned long get_safe_page(gfp_t gfp_mask);
>  
> +/* Page management functions for the software suspend (swsusp) */
> +
> +static inline void swsusp_set_page_forbidden(struct page *page)
> +{
> +	SetPageNosave(page);
> +}
> +
> +static inline int swsusp_page_is_forbidden(struct page *page)
> +{
> +	return PageNosave(page);
> +}
> +
> +static inline void swsusp_unset_page_forbidden(struct page *page)
> +{
> +	ClearPageNosave(page);
> +}
> +
> +static inline void swsusp_set_page_free(struct page *page)
> +{
> +	SetPageNosaveFree(page);
> +}
> +
> +static inline int swsusp_page_is_free(struct page *page)
> +{
> +	return PageNosaveFree(page);
> +}
> +
> +static inline void swsusp_unset_page_free(struct page *page)
> +{
> +	ClearPageNosaveFree(page);
> +}

Hi,

I don't have much to do with swsusp, but I really prefer that a
page flag name should tell you what the property of the page is,
rather than what this subsystem should or shouldn't do with it.

I thought the page flag names I used were pretty nice, and a big
improvement overthe current page flag names.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 1/3] swsusp: Do not use page flags directly
@ 2007-03-13  4:45                   ` Nick Piggin
  0 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  4:45 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

Rafael J. Wysocki wrote:
> Make swsusp stop using SetPageNosave(), SetPageNosaveFree() and friends
> directly.
> 
> This way the amount of changes made in the next patch is smaller.
> 
> ---
>  include/linux/suspend.h |   33 +++++++++++++++++++++++++++++++++
>  kernel/power/snapshot.c |   48 +++++++++++++++++++++++++-----------------------
>  mm/page_alloc.c         |    6 +++---
>  3 files changed, 61 insertions(+), 26 deletions(-)
> 
> Index: linux-2.6.21-rc2/include/linux/suspend.h
> ===================================================================
> --- linux-2.6.21-rc2.orig/include/linux/suspend.h	2007-03-02 09:05:53.000000000 +0100
> +++ linux-2.6.21-rc2/include/linux/suspend.h	2007-03-02 09:24:02.000000000 +0100
> @@ -8,6 +8,7 @@
>  #include <linux/notifier.h>
>  #include <linux/init.h>
>  #include <linux/pm.h>
> +#include <linux/mm.h>
>  
>  /* struct pbe is used for creating lists of pages that should be restored
>   * atomically during the resume from disk, because the page frames they have
> @@ -49,6 +50,38 @@ void __save_processor_state(struct saved
>  void __restore_processor_state(struct saved_context *ctxt);
>  unsigned long get_safe_page(gfp_t gfp_mask);
>  
> +/* Page management functions for the software suspend (swsusp) */
> +
> +static inline void swsusp_set_page_forbidden(struct page *page)
> +{
> +	SetPageNosave(page);
> +}
> +
> +static inline int swsusp_page_is_forbidden(struct page *page)
> +{
> +	return PageNosave(page);
> +}
> +
> +static inline void swsusp_unset_page_forbidden(struct page *page)
> +{
> +	ClearPageNosave(page);
> +}
> +
> +static inline void swsusp_set_page_free(struct page *page)
> +{
> +	SetPageNosaveFree(page);
> +}
> +
> +static inline int swsusp_page_is_free(struct page *page)
> +{
> +	return PageNosaveFree(page);
> +}
> +
> +static inline void swsusp_unset_page_free(struct page *page)
> +{
> +	ClearPageNosaveFree(page);
> +}

Hi,

I don't have much to do with swsusp, but I really prefer that a
page flag name should tell you what the property of the page is,
rather than what this subsystem should or shouldn't do with it.

I thought the page flag names I used were pretty nice, and a big
improvement overthe current page flag names.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-04 14:07                 ` Rafael J. Wysocki
@ 2007-03-13  4:47                   ` Nick Piggin
  -1 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  4:47 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

Rafael J. Wysocki wrote:

>  }
>  
>  /**
> + *	This structure represents a range of page frames the contents of which
> + *	should not be saved during the suspend.
> + */
> +
> +struct nosave_region {
> +	struct list_head list;
> +	unsigned long start_pfn;
> +	unsigned long end_pfn;
> +};
> +
> +static LIST_HEAD(nosave_regions);
> +
> +/**
> + *	register_nosave_region - register a range of page frames the contents
> + *	of which should not be saved during the suspend (to be used in the early
> + *	initializatoion code)
> + */
> +
> +void __init
> +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
> +{
> +	struct nosave_region *region;
> +
> +	if (start_pfn >= end_pfn)
> +		return;
> +
> +	if (!list_empty(&nosave_regions)) {
> +		/* Try to extend the previous region (they should be sorted) */
> +		region = list_entry(nosave_regions.prev,
> +					struct nosave_region, list);
> +		if (region->end_pfn == start_pfn) {
> +			region->end_pfn = end_pfn;
> +			goto Report;
> +		}
> +	}
> +	/* This allocation cannot fail */
> +	region = alloc_bootmem_low(sizeof(struct nosave_region));
> +	region->start_pfn = start_pfn;
> +	region->end_pfn = end_pfn;
> +	list_add_tail(&region->list, &nosave_regions);
> + Report:
> +	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
> +		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
> +}


I wonder why you reimplemented this and put it in snapshot.c, rather than
use my version which was nicely in its own file, had appropriate locking,
etc.?

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-13  4:47                   ` Nick Piggin
  0 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  4:47 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

Rafael J. Wysocki wrote:

>  }
>  
>  /**
> + *	This structure represents a range of page frames the contents of which
> + *	should not be saved during the suspend.
> + */
> +
> +struct nosave_region {
> +	struct list_head list;
> +	unsigned long start_pfn;
> +	unsigned long end_pfn;
> +};
> +
> +static LIST_HEAD(nosave_regions);
> +
> +/**
> + *	register_nosave_region - register a range of page frames the contents
> + *	of which should not be saved during the suspend (to be used in the early
> + *	initializatoion code)
> + */
> +
> +void __init
> +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
> +{
> +	struct nosave_region *region;
> +
> +	if (start_pfn >= end_pfn)
> +		return;
> +
> +	if (!list_empty(&nosave_regions)) {
> +		/* Try to extend the previous region (they should be sorted) */
> +		region = list_entry(nosave_regions.prev,
> +					struct nosave_region, list);
> +		if (region->end_pfn == start_pfn) {
> +			region->end_pfn = end_pfn;
> +			goto Report;
> +		}
> +	}
> +	/* This allocation cannot fail */
> +	region = alloc_bootmem_low(sizeof(struct nosave_region));
> +	region->start_pfn = start_pfn;
> +	region->end_pfn = end_pfn;
> +	list_add_tail(&region->list, &nosave_regions);
> + Report:
> +	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
> +		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
> +}


I wonder why you reimplemented this and put it in snapshot.c, rather than
use my version which was nicely in its own file, had appropriate locking,
etc.?

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-13  4:47                   ` Nick Piggin
@ 2007-03-13  9:16                     ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-13  9:16 UTC (permalink / raw)
  To: Nick Piggin
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

On Tuesday, 13 March 2007 05:47, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> 
> >  }
> >  
> >  /**
> > + *	This structure represents a range of page frames the contents of which
> > + *	should not be saved during the suspend.
> > + */
> > +
> > +struct nosave_region {
> > +	struct list_head list;
> > +	unsigned long start_pfn;
> > +	unsigned long end_pfn;
> > +};
> > +
> > +static LIST_HEAD(nosave_regions);
> > +
> > +/**
> > + *	register_nosave_region - register a range of page frames the contents
> > + *	of which should not be saved during the suspend (to be used in the early
> > + *	initializatoion code)
> > + */
> > +
> > +void __init
> > +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
> > +{
> > +	struct nosave_region *region;
> > +
> > +	if (start_pfn >= end_pfn)
> > +		return;
> > +
> > +	if (!list_empty(&nosave_regions)) {
> > +		/* Try to extend the previous region (they should be sorted) */
> > +		region = list_entry(nosave_regions.prev,
> > +					struct nosave_region, list);
> > +		if (region->end_pfn == start_pfn) {
> > +			region->end_pfn = end_pfn;
> > +			goto Report;
> > +		}
> > +	}
> > +	/* This allocation cannot fail */
> > +	region = alloc_bootmem_low(sizeof(struct nosave_region));
> > +	region->start_pfn = start_pfn;
> > +	region->end_pfn = end_pfn;
> > +	list_add_tail(&region->list, &nosave_regions);
> > + Report:
> > +	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
> > +		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
> > +}
> 
> 
> I wonder why you reimplemented this and put it in snapshot.c, rather than
> use my version which was nicely in its own file, had appropriate locking,
> etc.?

Well, the locking is not necessary and we only need a list for that.  Also,
mark_nosave_pages() refers to a function that's invisible outside snapshot.c
and I didn't think it was a good idea to separate mark_nosave_pages()
from register_nosave_region().

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-13  9:16                     ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-13  9:16 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

On Tuesday, 13 March 2007 05:47, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> 
> >  }
> >  
> >  /**
> > + *	This structure represents a range of page frames the contents of which
> > + *	should not be saved during the suspend.
> > + */
> > +
> > +struct nosave_region {
> > +	struct list_head list;
> > +	unsigned long start_pfn;
> > +	unsigned long end_pfn;
> > +};
> > +
> > +static LIST_HEAD(nosave_regions);
> > +
> > +/**
> > + *	register_nosave_region - register a range of page frames the contents
> > + *	of which should not be saved during the suspend (to be used in the early
> > + *	initializatoion code)
> > + */
> > +
> > +void __init
> > +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
> > +{
> > +	struct nosave_region *region;
> > +
> > +	if (start_pfn >= end_pfn)
> > +		return;
> > +
> > +	if (!list_empty(&nosave_regions)) {
> > +		/* Try to extend the previous region (they should be sorted) */
> > +		region = list_entry(nosave_regions.prev,
> > +					struct nosave_region, list);
> > +		if (region->end_pfn == start_pfn) {
> > +			region->end_pfn = end_pfn;
> > +			goto Report;
> > +		}
> > +	}
> > +	/* This allocation cannot fail */
> > +	region = alloc_bootmem_low(sizeof(struct nosave_region));
> > +	region->start_pfn = start_pfn;
> > +	region->end_pfn = end_pfn;
> > +	list_add_tail(&region->list, &nosave_regions);
> > + Report:
> > +	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
> > +		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
> > +}
> 
> 
> I wonder why you reimplemented this and put it in snapshot.c, rather than
> use my version which was nicely in its own file, had appropriate locking,
> etc.?

Well, the locking is not necessary and we only need a list for that.  Also,
mark_nosave_pages() refers to a function that's invisible outside snapshot.c
and I didn't think it was a good idea to separate mark_nosave_pages()
from register_nosave_region().

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-13  9:16                     ` Rafael J. Wysocki
@ 2007-03-13  9:23                       ` Nick Piggin
  -1 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  9:23 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

Rafael J. Wysocki wrote:
> On Tuesday, 13 March 2007 05:47, Nick Piggin wrote:
> 
>>Rafael J. Wysocki wrote:
>>
>>
>>> }
>>> 
>>> /**
>>>+ *	This structure represents a range of page frames the contents of which
>>>+ *	should not be saved during the suspend.
>>>+ */
>>>+
>>>+struct nosave_region {
>>>+	struct list_head list;
>>>+	unsigned long start_pfn;
>>>+	unsigned long end_pfn;
>>>+};
>>>+
>>>+static LIST_HEAD(nosave_regions);
>>>+
>>>+/**
>>>+ *	register_nosave_region - register a range of page frames the contents
>>>+ *	of which should not be saved during the suspend (to be used in the early
>>>+ *	initializatoion code)
>>>+ */
>>>+
>>>+void __init
>>>+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
>>>+{
>>>+	struct nosave_region *region;
>>>+
>>>+	if (start_pfn >= end_pfn)
>>>+		return;
>>>+
>>>+	if (!list_empty(&nosave_regions)) {
>>>+		/* Try to extend the previous region (they should be sorted) */
>>>+		region = list_entry(nosave_regions.prev,
>>>+					struct nosave_region, list);
>>>+		if (region->end_pfn == start_pfn) {
>>>+			region->end_pfn = end_pfn;
>>>+			goto Report;
>>>+		}
>>>+	}
>>>+	/* This allocation cannot fail */
>>>+	region = alloc_bootmem_low(sizeof(struct nosave_region));
>>>+	region->start_pfn = start_pfn;
>>>+	region->end_pfn = end_pfn;
>>>+	list_add_tail(&region->list, &nosave_regions);
>>>+ Report:
>>>+	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
>>>+		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
>>>+}
>>
>>
>>I wonder why you reimplemented this and put it in snapshot.c, rather than
>>use my version which was nicely in its own file, had appropriate locking,
>>etc.?
> 
> 
> Well, the locking is not necessary and we only need a list for that.  Also,

I wouldn't say that. You're creating an interface here that is going to be
used outside swsusp. Users of that interface may not need locking now, but
that could cause problems down the line.

Sure you don't _need_ an rbtree, but our implementation makes it so simple
that there isn't much downside.

> mark_nosave_pages() refers to a function that's invisible outside snapshot.c
> and I didn't think it was a good idea to separate mark_nosave_pages()
> from register_nosave_region().

But that's because you even use mark_nosave_pages in your implementation.
Mine uses the nosave regions directly.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-13  9:23                       ` Nick Piggin
  0 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13  9:23 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

Rafael J. Wysocki wrote:
> On Tuesday, 13 March 2007 05:47, Nick Piggin wrote:
> 
>>Rafael J. Wysocki wrote:
>>
>>
>>> }
>>> 
>>> /**
>>>+ *	This structure represents a range of page frames the contents of which
>>>+ *	should not be saved during the suspend.
>>>+ */
>>>+
>>>+struct nosave_region {
>>>+	struct list_head list;
>>>+	unsigned long start_pfn;
>>>+	unsigned long end_pfn;
>>>+};
>>>+
>>>+static LIST_HEAD(nosave_regions);
>>>+
>>>+/**
>>>+ *	register_nosave_region - register a range of page frames the contents
>>>+ *	of which should not be saved during the suspend (to be used in the early
>>>+ *	initializatoion code)
>>>+ */
>>>+
>>>+void __init
>>>+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
>>>+{
>>>+	struct nosave_region *region;
>>>+
>>>+	if (start_pfn >= end_pfn)
>>>+		return;
>>>+
>>>+	if (!list_empty(&nosave_regions)) {
>>>+		/* Try to extend the previous region (they should be sorted) */
>>>+		region = list_entry(nosave_regions.prev,
>>>+					struct nosave_region, list);
>>>+		if (region->end_pfn == start_pfn) {
>>>+			region->end_pfn = end_pfn;
>>>+			goto Report;
>>>+		}
>>>+	}
>>>+	/* This allocation cannot fail */
>>>+	region = alloc_bootmem_low(sizeof(struct nosave_region));
>>>+	region->start_pfn = start_pfn;
>>>+	region->end_pfn = end_pfn;
>>>+	list_add_tail(&region->list, &nosave_regions);
>>>+ Report:
>>>+	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
>>>+		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
>>>+}
>>
>>
>>I wonder why you reimplemented this and put it in snapshot.c, rather than
>>use my version which was nicely in its own file, had appropriate locking,
>>etc.?
> 
> 
> Well, the locking is not necessary and we only need a list for that.  Also,

I wouldn't say that. You're creating an interface here that is going to be
used outside swsusp. Users of that interface may not need locking now, but
that could cause problems down the line.

Sure you don't _need_ an rbtree, but our implementation makes it so simple
that there isn't much downside.

> mark_nosave_pages() refers to a function that's invisible outside snapshot.c
> and I didn't think it was a good idea to separate mark_nosave_pages()
> from register_nosave_region().

But that's because you even use mark_nosave_pages in your implementation.
Mine uses the nosave regions directly.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-13  9:23                       ` Nick Piggin
@ 2007-03-13 10:17                         ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-13 10:17 UTC (permalink / raw)
  To: Nick Piggin
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Tuesday, 13 March 2007 05:47, Nick Piggin wrote:
> > 
> >>Rafael J. Wysocki wrote:
> >>
> >>
> >>> }
> >>> 
> >>> /**
> >>>+ *	This structure represents a range of page frames the contents of which
> >>>+ *	should not be saved during the suspend.
> >>>+ */
> >>>+
> >>>+struct nosave_region {
> >>>+	struct list_head list;
> >>>+	unsigned long start_pfn;
> >>>+	unsigned long end_pfn;
> >>>+};
> >>>+
> >>>+static LIST_HEAD(nosave_regions);
> >>>+
> >>>+/**
> >>>+ *	register_nosave_region - register a range of page frames the contents
> >>>+ *	of which should not be saved during the suspend (to be used in the early
> >>>+ *	initializatoion code)
> >>>+ */
> >>>+
> >>>+void __init
> >>>+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
> >>>+{
> >>>+	struct nosave_region *region;
> >>>+
> >>>+	if (start_pfn >= end_pfn)
> >>>+		return;
> >>>+
> >>>+	if (!list_empty(&nosave_regions)) {
> >>>+		/* Try to extend the previous region (they should be sorted) */
> >>>+		region = list_entry(nosave_regions.prev,
> >>>+					struct nosave_region, list);
> >>>+		if (region->end_pfn == start_pfn) {
> >>>+			region->end_pfn = end_pfn;
> >>>+			goto Report;
> >>>+		}
> >>>+	}
> >>>+	/* This allocation cannot fail */
> >>>+	region = alloc_bootmem_low(sizeof(struct nosave_region));
> >>>+	region->start_pfn = start_pfn;
> >>>+	region->end_pfn = end_pfn;
> >>>+	list_add_tail(&region->list, &nosave_regions);
> >>>+ Report:
> >>>+	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
> >>>+		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
> >>>+}
> >>
> >>
> >>I wonder why you reimplemented this and put it in snapshot.c, rather than
> >>use my version which was nicely in its own file, had appropriate locking,
> >>etc.?
> > 
> > 
> > Well, the locking is not necessary and we only need a list for that.  Also,
> 
> I wouldn't say that. You're creating an interface here that is going to be
> used outside swsusp. Users of that interface may not need locking now, but
> that could cause problems down the line.

I think we can add the locking when it's necessary.  For now, IMHO, it could be
confusing to someone who doesn't know the locking is not needed.

> Sure you don't _need_ an rbtree, but our implementation makes it so simple
> that there isn't much downside.

Not much, but the code is more complicated.

> > mark_nosave_pages() refers to a function that's invisible outside snapshot.c
> > and I didn't think it was a good idea to separate mark_nosave_pages()
> > from register_nosave_region().
> 
> But that's because you even use mark_nosave_pages in your implementation.
> Mine uses the nosave regions directly.

Well, I think we need two bits per page anyway, to mark free pages and
pages allocated by swsusp, so using the nosave regions directly won't save us
much.

Still, for now the bits are not optimally used, but I'm going to change that.
I just wanted to avoid making too many changes at a time in this particular
patch series.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-13 10:17                         ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-13 10:17 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Tuesday, 13 March 2007 05:47, Nick Piggin wrote:
> > 
> >>Rafael J. Wysocki wrote:
> >>
> >>
> >>> }
> >>> 
> >>> /**
> >>>+ *	This structure represents a range of page frames the contents of which
> >>>+ *	should not be saved during the suspend.
> >>>+ */
> >>>+
> >>>+struct nosave_region {
> >>>+	struct list_head list;
> >>>+	unsigned long start_pfn;
> >>>+	unsigned long end_pfn;
> >>>+};
> >>>+
> >>>+static LIST_HEAD(nosave_regions);
> >>>+
> >>>+/**
> >>>+ *	register_nosave_region - register a range of page frames the contents
> >>>+ *	of which should not be saved during the suspend (to be used in the early
> >>>+ *	initializatoion code)
> >>>+ */
> >>>+
> >>>+void __init
> >>>+register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
> >>>+{
> >>>+	struct nosave_region *region;
> >>>+
> >>>+	if (start_pfn >= end_pfn)
> >>>+		return;
> >>>+
> >>>+	if (!list_empty(&nosave_regions)) {
> >>>+		/* Try to extend the previous region (they should be sorted) */
> >>>+		region = list_entry(nosave_regions.prev,
> >>>+					struct nosave_region, list);
> >>>+		if (region->end_pfn == start_pfn) {
> >>>+			region->end_pfn = end_pfn;
> >>>+			goto Report;
> >>>+		}
> >>>+	}
> >>>+	/* This allocation cannot fail */
> >>>+	region = alloc_bootmem_low(sizeof(struct nosave_region));
> >>>+	region->start_pfn = start_pfn;
> >>>+	region->end_pfn = end_pfn;
> >>>+	list_add_tail(&region->list, &nosave_regions);
> >>>+ Report:
> >>>+	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
> >>>+		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
> >>>+}
> >>
> >>
> >>I wonder why you reimplemented this and put it in snapshot.c, rather than
> >>use my version which was nicely in its own file, had appropriate locking,
> >>etc.?
> > 
> > 
> > Well, the locking is not necessary and we only need a list for that.  Also,
> 
> I wouldn't say that. You're creating an interface here that is going to be
> used outside swsusp. Users of that interface may not need locking now, but
> that could cause problems down the line.

I think we can add the locking when it's necessary.  For now, IMHO, it could be
confusing to someone who doesn't know the locking is not needed.

> Sure you don't _need_ an rbtree, but our implementation makes it so simple
> that there isn't much downside.

Not much, but the code is more complicated.

> > mark_nosave_pages() refers to a function that's invisible outside snapshot.c
> > and I didn't think it was a good idea to separate mark_nosave_pages()
> > from register_nosave_region().
> 
> But that's because you even use mark_nosave_pages in your implementation.
> Mine uses the nosave regions directly.

Well, I think we need two bits per page anyway, to mark free pages and
pages allocated by swsusp, so using the nosave regions directly won't save us
much.

Still, for now the bits are not optimally used, but I'm going to change that.
I just wanted to avoid making too many changes at a time in this particular
patch series.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-13 10:17                         ` Rafael J. Wysocki
@ 2007-03-13 10:31                           ` Nick Piggin
  -1 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13 10:31 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

Rafael J. Wysocki wrote:
> On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> 

>>I wouldn't say that. You're creating an interface here that is going to be
>>used outside swsusp. Users of that interface may not need locking now, but
>>that could cause problems down the line.
> 
> 
> I think we can add the locking when it's necessary.  For now, IMHO, it could be
> confusing to someone who doesn't know the locking is not needed.

I don't know why it would confuse them. We just define the API to
guarantee the correct locking, and that means the locking _is_ needed.

You don't have to care what the callers are doing. That's the beauty
of a sane API.


>>Sure you don't _need_ an rbtree, but our implementation makes it so simple
>>that there isn't much downside.
> 
> 
> Not much, but the code is more complicated.

But it's in its own file and has a contained API, so it is very easy
to review, test and verify.


>>>mark_nosave_pages() refers to a function that's invisible outside snapshot.c
>>>and I didn't think it was a good idea to separate mark_nosave_pages()
>>>from register_nosave_region().
>>
>>But that's because you even use mark_nosave_pages in your implementation.
>>Mine uses the nosave regions directly.
> 
> 
> Well, I think we need two bits per page anyway, to mark free pages and
> pages allocated by swsusp, so using the nosave regions directly won't save us
> much.

Well I think it is a cleaner though.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-13 10:31                           ` Nick Piggin
  0 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-13 10:31 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

Rafael J. Wysocki wrote:
> On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> 

>>I wouldn't say that. You're creating an interface here that is going to be
>>used outside swsusp. Users of that interface may not need locking now, but
>>that could cause problems down the line.
> 
> 
> I think we can add the locking when it's necessary.  For now, IMHO, it could be
> confusing to someone who doesn't know the locking is not needed.

I don't know why it would confuse them. We just define the API to
guarantee the correct locking, and that means the locking _is_ needed.

You don't have to care what the callers are doing. That's the beauty
of a sane API.


>>Sure you don't _need_ an rbtree, but our implementation makes it so simple
>>that there isn't much downside.
> 
> 
> Not much, but the code is more complicated.

But it's in its own file and has a contained API, so it is very easy
to review, test and verify.


>>>mark_nosave_pages() refers to a function that's invisible outside snapshot.c
>>>and I didn't think it was a good idea to separate mark_nosave_pages()
>>>from register_nosave_region().
>>
>>But that's because you even use mark_nosave_pages in your implementation.
>>Mine uses the nosave regions directly.
> 
> 
> Well, I think we need two bits per page anyway, to mark free pages and
> pages allocated by swsusp, so using the nosave regions directly won't save us
> much.

Well I think it is a cleaner though.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-13 10:31                           ` Nick Piggin
@ 2007-03-13 21:20                             ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-13 21:20 UTC (permalink / raw)
  To: Nick Piggin
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

On Tuesday, 13 March 2007 11:31, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> > 
> 
> >>I wouldn't say that. You're creating an interface here that is going to be
> >>used outside swsusp. Users of that interface may not need locking now, but
> >>that could cause problems down the line.
> > 
> > 
> > I think we can add the locking when it's necessary.  For now, IMHO, it could be
> > confusing to someone who doesn't know the locking is not needed.
> 
> I don't know why it would confuse them. We just define the API to
> guarantee the correct locking, and that means the locking _is_ needed.

Even if there are no users that actually need the locking and probably never
will be?

For now, register_nosave_region() is to be called by architecture
initialization code _only_ and there's no reason whatsoever why any
architecture would need to call it concurrently from many places.

> You don't have to care what the callers are doing. That's the beauty
> of a sane API.

Well, I don't think adding unneded infrastructure is a good thing.

> >>Sure you don't _need_ an rbtree, but our implementation makes it so simple
> >>that there isn't much downside.
> > 
> > 
> > Not much, but the code is more complicated.
> 
> But it's in its own file and has a contained API, so it is very easy
> to review, test and verify.

I'm still thinking that register_nosave_region() in its current form is simpler
and easier to review than the code using rbtrees.  Still, of course this only
is my personal opinion. :-)

> >>>mark_nosave_pages() refers to a function that's invisible outside snapshot.c
> >>>and I didn't think it was a good idea to separate mark_nosave_pages()
> >>>from register_nosave_region().
> >>
> >>But that's because you even use mark_nosave_pages in your implementation.
> >>Mine uses the nosave regions directly.
> > 
> > 
> > Well, I think we need two bits per page anyway, to mark free pages and
> > pages allocated by swsusp, so using the nosave regions directly won't save us
> > much.
> 
> Well I think it is a cleaner though.

This is a matter of opinion, too ...

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-13 21:20                             ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-13 21:20 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

On Tuesday, 13 March 2007 11:31, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> > 
> 
> >>I wouldn't say that. You're creating an interface here that is going to be
> >>used outside swsusp. Users of that interface may not need locking now, but
> >>that could cause problems down the line.
> > 
> > 
> > I think we can add the locking when it's necessary.  For now, IMHO, it could be
> > confusing to someone who doesn't know the locking is not needed.
> 
> I don't know why it would confuse them. We just define the API to
> guarantee the correct locking, and that means the locking _is_ needed.

Even if there are no users that actually need the locking and probably never
will be?

For now, register_nosave_region() is to be called by architecture
initialization code _only_ and there's no reason whatsoever why any
architecture would need to call it concurrently from many places.

> You don't have to care what the callers are doing. That's the beauty
> of a sane API.

Well, I don't think adding unneded infrastructure is a good thing.

> >>Sure you don't _need_ an rbtree, but our implementation makes it so simple
> >>that there isn't much downside.
> > 
> > 
> > Not much, but the code is more complicated.
> 
> But it's in its own file and has a contained API, so it is very easy
> to review, test and verify.

I'm still thinking that register_nosave_region() in its current form is simpler
and easier to review than the code using rbtrees.  Still, of course this only
is my personal opinion. :-)

> >>>mark_nosave_pages() refers to a function that's invisible outside snapshot.c
> >>>and I didn't think it was a good idea to separate mark_nosave_pages()
> >>>from register_nosave_region().
> >>
> >>But that's because you even use mark_nosave_pages in your implementation.
> >>Mine uses the nosave regions directly.
> > 
> > 
> > Well, I think we need two bits per page anyway, to mark free pages and
> > pages allocated by swsusp, so using the nosave regions directly won't save us
> > much.
> 
> Well I think it is a cleaner though.

This is a matter of opinion, too ...

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-13 21:20                             ` Rafael J. Wysocki
@ 2007-03-14  3:17                               ` Nick Piggin
  -1 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-14  3:17 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

Rafael J. Wysocki wrote:
> On Tuesday, 13 March 2007 11:31, Nick Piggin wrote:
> 
>>Rafael J. Wysocki wrote:
>>
>>>On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
>>>
>>
>>>>I wouldn't say that. You're creating an interface here that is going to be
>>>>used outside swsusp. Users of that interface may not need locking now, but
>>>>that could cause problems down the line.
>>>
>>>
>>>I think we can add the locking when it's necessary.  For now, IMHO, it could be
>>>confusing to someone who doesn't know the locking is not needed.
>>
>>I don't know why it would confuse them. We just define the API to
>>guarantee the correct locking, and that means the locking _is_ needed.
> 
> 
> Even if there are no users that actually need the locking and probably never
> will be?

Probably is the keyword.

Why would you *not* make this a sane API? Surely performance isn't the
reason? Nor complexity.

> For now, register_nosave_region() is to be called by architecture
> initialization code _only_ and there's no reason whatsoever why any
> architecture would need to call it concurrently from many places.
> 
> 
>>You don't have to care what the callers are doing. That's the beauty
>>of a sane API.
> 
> 
> Well, I don't think adding unneded infrastructure is a good thing.


But defining good APIs is a very good thing. And with my good API, the
lock is not unneeded.

>>>>But that's because you even use mark_nosave_pages in your implementation.
>>>>Mine uses the nosave regions directly.
>>>
>>>
>>>Well, I think we need two bits per page anyway, to mark free pages and
>>>pages allocated by swsusp, so using the nosave regions directly won't save us
>>>much.
>>
>>Well I think it is a cleaner though.
> 
> 
> This is a matter of opinion, too ...


Well, as I'm not volunteering to maintain swsusp, if your opinion is that
your way is cleaner, I can't argue ;) So long as it stops wasting those page
flags then I'm happy.

However the register_nosave API really should use locking, I think. There
is absolutely no downside, AFAIKS.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-14  3:17                               ` Nick Piggin
  0 siblings, 0 replies; 85+ messages in thread
From: Nick Piggin @ 2007-03-14  3:17 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

Rafael J. Wysocki wrote:
> On Tuesday, 13 March 2007 11:31, Nick Piggin wrote:
> 
>>Rafael J. Wysocki wrote:
>>
>>>On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
>>>
>>
>>>>I wouldn't say that. You're creating an interface here that is going to be
>>>>used outside swsusp. Users of that interface may not need locking now, but
>>>>that could cause problems down the line.
>>>
>>>
>>>I think we can add the locking when it's necessary.  For now, IMHO, it could be
>>>confusing to someone who doesn't know the locking is not needed.
>>
>>I don't know why it would confuse them. We just define the API to
>>guarantee the correct locking, and that means the locking _is_ needed.
> 
> 
> Even if there are no users that actually need the locking and probably never
> will be?

Probably is the keyword.

Why would you *not* make this a sane API? Surely performance isn't the
reason? Nor complexity.

> For now, register_nosave_region() is to be called by architecture
> initialization code _only_ and there's no reason whatsoever why any
> architecture would need to call it concurrently from many places.
> 
> 
>>You don't have to care what the callers are doing. That's the beauty
>>of a sane API.
> 
> 
> Well, I don't think adding unneded infrastructure is a good thing.


But defining good APIs is a very good thing. And with my good API, the
lock is not unneeded.

>>>>But that's because you even use mark_nosave_pages in your implementation.
>>>>Mine uses the nosave regions directly.
>>>
>>>
>>>Well, I think we need two bits per page anyway, to mark free pages and
>>>pages allocated by swsusp, so using the nosave regions directly won't save us
>>>much.
>>
>>Well I think it is a cleaner though.
> 
> 
> This is a matter of opinion, too ...


Well, as I'm not volunteering to maintain swsusp, if your opinion is that
your way is cleaner, I can't argue ;) So long as it stops wasting those page
flags then I'm happy.

However the register_nosave API really should use locking, I think. There
is absolutely no downside, AFAIKS.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
  2007-03-14  3:17                               ` Nick Piggin
@ 2007-03-14  8:30                                 ` Rafael J. Wysocki
  -1 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-14  8:30 UTC (permalink / raw)
  To: Nick Piggin
  Cc: linux-mm, Peter Zijlstra, Pavel Machek, pm list, Johannes Berg,
	Christoph Lameter

On Wednesday, 14 March 2007 04:17, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Tuesday, 13 March 2007 11:31, Nick Piggin wrote:
> > 
> >>Rafael J. Wysocki wrote:
> >>
> >>>On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> >>>
> >>
> >>>>I wouldn't say that. You're creating an interface here that is going to be
> >>>>used outside swsusp. Users of that interface may not need locking now, but
> >>>>that could cause problems down the line.
> >>>
> >>>
> >>>I think we can add the locking when it's necessary.  For now, IMHO, it could be
> >>>confusing to someone who doesn't know the locking is not needed.
> >>
> >>I don't know why it would confuse them. We just define the API to
> >>guarantee the correct locking, and that means the locking _is_ needed.
> > 
> > 
> > Even if there are no users that actually need the locking and probably never
> > will be?
> 
> Probably is the keyword.
> 
> Why would you *not* make this a sane API? Surely performance isn't the
> reason? Nor complexity.
> 
> > For now, register_nosave_region() is to be called by architecture
> > initialization code _only_ and there's no reason whatsoever why any
> > architecture would need to call it concurrently from many places.
> > 
> > 
> >>You don't have to care what the callers are doing. That's the beauty
> >>of a sane API.
> > 
> > 
> > Well, I don't think adding unneded infrastructure is a good thing.
> 
> 
> But defining good APIs is a very good thing. And with my good API, the
> lock is not unneeded.
> 
> >>>>But that's because you even use mark_nosave_pages in your implementation.
> >>>>Mine uses the nosave regions directly.
> >>>
> >>>
> >>>Well, I think we need two bits per page anyway, to mark free pages and
> >>>pages allocated by swsusp, so using the nosave regions directly won't save us
> >>>much.
> >>
> >>Well I think it is a cleaner though.
> > 
> > 
> > This is a matter of opinion, too ...
> 
> 
> Well, as I'm not volunteering to maintain swsusp, if your opinion is that
> your way is cleaner, I can't argue ;) So long as it stops wasting those page
> flags then I'm happy.
> 
> However the register_nosave API really should use locking, I think. There
> is absolutely no downside, AFAIKS.

Hm, I can add the lock, but currently register_nosave_region() calls
alloc_bootmem_low() directly, which means it won't even work outside the
early initialization code which is single-threaded.  For this reason I think
that the locking can be added later, when register_nosave_region() is
extended so that it can be called from other contexts too.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [RFC][PATCH 2/3] swsusp: Do not use page flags
@ 2007-03-14  8:30                                 ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2007-03-14  8:30 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Pavel Machek, Christoph Lameter, linux-mm, pm list,
	Johannes Berg, Peter Zijlstra

On Wednesday, 14 March 2007 04:17, Nick Piggin wrote:
> Rafael J. Wysocki wrote:
> > On Tuesday, 13 March 2007 11:31, Nick Piggin wrote:
> > 
> >>Rafael J. Wysocki wrote:
> >>
> >>>On Tuesday, 13 March 2007 10:23, Nick Piggin wrote:
> >>>
> >>
> >>>>I wouldn't say that. You're creating an interface here that is going to be
> >>>>used outside swsusp. Users of that interface may not need locking now, but
> >>>>that could cause problems down the line.
> >>>
> >>>
> >>>I think we can add the locking when it's necessary.  For now, IMHO, it could be
> >>>confusing to someone who doesn't know the locking is not needed.
> >>
> >>I don't know why it would confuse them. We just define the API to
> >>guarantee the correct locking, and that means the locking _is_ needed.
> > 
> > 
> > Even if there are no users that actually need the locking and probably never
> > will be?
> 
> Probably is the keyword.
> 
> Why would you *not* make this a sane API? Surely performance isn't the
> reason? Nor complexity.
> 
> > For now, register_nosave_region() is to be called by architecture
> > initialization code _only_ and there's no reason whatsoever why any
> > architecture would need to call it concurrently from many places.
> > 
> > 
> >>You don't have to care what the callers are doing. That's the beauty
> >>of a sane API.
> > 
> > 
> > Well, I don't think adding unneded infrastructure is a good thing.
> 
> 
> But defining good APIs is a very good thing. And with my good API, the
> lock is not unneeded.
> 
> >>>>But that's because you even use mark_nosave_pages in your implementation.
> >>>>Mine uses the nosave regions directly.
> >>>
> >>>
> >>>Well, I think we need two bits per page anyway, to mark free pages and
> >>>pages allocated by swsusp, so using the nosave regions directly won't save us
> >>>much.
> >>
> >>Well I think it is a cleaner though.
> > 
> > 
> > This is a matter of opinion, too ...
> 
> 
> Well, as I'm not volunteering to maintain swsusp, if your opinion is that
> your way is cleaner, I can't argue ;) So long as it stops wasting those page
> flags then I'm happy.
> 
> However the register_nosave API really should use locking, I think. There
> is absolutely no downside, AFAIKS.

Hm, I can add the lock, but currently register_nosave_region() calls
alloc_bootmem_low() directly, which means it won't even work outside the
early initialization code which is single-threaded.  For this reason I think
that the locking can be added later, when register_nosave_region() is
extended so that it can be called from other contexts too.

Greetings,
Rafael

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 85+ messages in thread

end of thread, other threads:[~2007-03-14  8:30 UTC | newest]

Thread overview: 85+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-16 10:13 Remove page flags for software suspend Christoph Lameter
2007-02-16 10:56 ` Rafael J. Wysocki
2007-02-28 10:14   ` Pavel Machek
2007-02-28 15:25     ` Christoph Lameter
2007-02-28 17:13       ` Rafael J. Wysocki
2007-02-28 17:17         ` Christoph Lameter
2007-02-28 17:33           ` Rafael J. Wysocki
2007-02-28 17:35             ` Christoph Lameter
2007-02-28 17:51               ` Rafael J. Wysocki
2007-02-28 17:56                 ` Christoph Lameter
2007-02-28 21:11                   ` Pavel Machek
2007-03-01  2:35           ` Nick Piggin
2007-03-01 15:18         ` Nick Piggin
2007-03-01 15:33           ` Rafael J. Wysocki
2007-03-01 23:10             ` Rafael J. Wysocki
2007-03-04 13:50             ` [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend) Rafael J. Wysocki
2007-03-04 13:50               ` Rafael J. Wysocki
2007-03-04 14:07               ` [RFC][PATCH 1/3] swsusp: Do not use page flags directly Rafael J. Wysocki
2007-03-04 14:07                 ` Rafael J. Wysocki
2007-03-13  4:45                 ` Nick Piggin
2007-03-13  4:45                   ` Nick Piggin
2007-03-04 14:07               ` [RFC][PATCH 2/3] swsusp: Do not use page flags Rafael J. Wysocki
2007-03-04 14:07                 ` Rafael J. Wysocki
2007-03-13  4:47                 ` Nick Piggin
2007-03-13  4:47                   ` Nick Piggin
2007-03-13  9:16                   ` Rafael J. Wysocki
2007-03-13  9:16                     ` Rafael J. Wysocki
2007-03-13  9:23                     ` Nick Piggin
2007-03-13  9:23                       ` Nick Piggin
2007-03-13 10:17                       ` Rafael J. Wysocki
2007-03-13 10:17                         ` Rafael J. Wysocki
2007-03-13 10:31                         ` Nick Piggin
2007-03-13 10:31                           ` Nick Piggin
2007-03-13 21:20                           ` Rafael J. Wysocki
2007-03-13 21:20                             ` Rafael J. Wysocki
2007-03-14  3:17                             ` Nick Piggin
2007-03-14  3:17                               ` Nick Piggin
2007-03-14  8:30                               ` Rafael J. Wysocki
2007-03-14  8:30                                 ` Rafael J. Wysocki
2007-03-04 14:08               ` [RFC][PATCH 3/3] mm: Remove nosave and nosave_free " Rafael J. Wysocki
2007-03-04 14:08                 ` Rafael J. Wysocki
2007-03-08  1:00               ` [RFC][PATCH 0/3] swsusp: Do not use page flags (was: Re: Remove page flags for software suspend) Johannes Berg
2007-03-08  1:00                 ` Johannes Berg
2007-03-08 22:05                 ` Rafael J. Wysocki
2007-03-08 22:05                   ` Rafael J. Wysocki
2007-03-08 22:10                   ` Johannes Berg
2007-03-08 22:10                     ` Johannes Berg
2007-03-08 22:33                     ` Rafael J. Wysocki
2007-03-08 22:33                       ` Rafael J. Wysocki
2007-03-08 22:43                       ` Johannes Berg
2007-03-08 22:43                         ` Johannes Berg
2007-03-08 22:54                         ` Rafael J. Wysocki
2007-03-08 22:54                           ` Rafael J. Wysocki
2007-03-08 22:54                           ` Johannes Berg
2007-03-08 22:54                             ` Johannes Berg
2007-03-08 23:15                   ` Pavel Machek
2007-03-08 23:15                     ` Pavel Machek
2007-03-08 23:21                     ` Johannes Berg
2007-03-08 23:21                       ` Johannes Berg
2007-03-08 23:23                       ` Pavel Machek
2007-03-08 23:23                         ` Pavel Machek
2007-03-08 23:34                       ` Rafael J. Wysocki
2007-03-08 23:34                         ` Rafael J. Wysocki
2007-03-08 23:36                         ` Pavel Machek
2007-03-08 23:36                           ` Pavel Machek
2007-03-08 15:09               ` Johannes Berg
2007-03-08 15:09                 ` Johannes Berg
2007-03-08 22:10                 ` Rafael J. Wysocki
2007-03-08 22:10                   ` Rafael J. Wysocki
2007-03-08 22:12                   ` Johannes Berg
2007-03-08 22:12                     ` Johannes Berg
2007-03-08 15:53               ` Peter Zijlstra
2007-03-08 15:53                 ` Peter Zijlstra
2007-03-08 22:11                 ` Rafael J. Wysocki
2007-03-08 22:11                   ` Rafael J. Wysocki
2007-03-01 17:48           ` Remove page flags for software suspend Hugh Dickins
2007-03-13  3:36             ` Nick Piggin
2007-03-01 20:46           ` Rafael J. Wysocki
2007-03-02 10:17             ` Pavel Machek
2007-02-28 21:08       ` Pavel Machek
2007-02-28 21:16         ` Christoph Lameter
2007-02-28 21:22           ` Pavel Machek
2007-02-28 22:23             ` Rafael J. Wysocki
2007-03-01  2:31         ` Nick Piggin
2007-02-28 10:14   ` Pavel Machek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.