All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-20  8:47 ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-20  8:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel, Huang Ying,
	Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu, Gerald Schaefer,
	Kirill A. Shutemov, Hugh Dickins, Ingo Molnar, Vegard Nossum,
	linux-mm, linux-kernel

From: Huang Ying <ying.huang@intel.com>

Now vzalloc() is used in swap code to allocate various data
structures, such as swap cache, swap slots cache, cluster info, etc.
Because the size may be too large on some system, so that normal
kzalloc() may fail.  But using kzalloc() has some advantages, for
example, less memory fragmentation, less TLB pressure, etc.  So change
the data structure allocation in swap code to use kvzalloc() which
will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
failed.

The allocation for swap_map[] in struct swap_info_struct is not
changed, because that is usually quite large and vmalloc_to_page() is
used for it.  That makes it a little harder to change.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Tim Chen <tim.c.chen@intel.com>
---
 mm/swap_slots.c | 19 +++++++++++--------
 mm/swap_state.c |  2 +-
 mm/swapfile.c   | 10 ++++++----
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 9b5bc86f96ad..7376d2ffb2db 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -31,6 +31,7 @@
 #include <linux/cpumask.h>
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 
 #ifdef CONFIG_SWAP
 
@@ -119,16 +120,18 @@ static int alloc_swap_slot_cache(unsigned int cpu)
 
 	/*
 	 * Do allocation outside swap_slots_cache_mutex
-	 * as vzalloc could trigger reclaim and get_swap_page,
+	 * as kvzalloc could trigger reclaim and get_swap_page,
 	 * which can lock swap_slots_cache_mutex.
 	 */
-	slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+	slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
+			 GFP_KERNEL);
 	if (!slots)
 		return -ENOMEM;
 
-	slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+	slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
+			     GFP_KERNEL);
 	if (!slots_ret) {
-		vfree(slots);
+		kvfree(slots);
 		return -ENOMEM;
 	}
 
@@ -152,9 +155,9 @@ static int alloc_swap_slot_cache(unsigned int cpu)
 out:
 	mutex_unlock(&swap_slots_cache_mutex);
 	if (slots)
-		vfree(slots);
+		kvfree(slots);
 	if (slots_ret)
-		vfree(slots_ret);
+		kvfree(slots_ret);
 	return 0;
 }
 
@@ -171,7 +174,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
 		cache->cur = 0;
 		cache->nr = 0;
 		if (free_slots && cache->slots) {
-			vfree(cache->slots);
+			kvfree(cache->slots);
 			cache->slots = NULL;
 		}
 		mutex_unlock(&cache->alloc_lock);
@@ -186,7 +189,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
 		}
 		spin_unlock_irq(&cache->free_lock);
 		if (slots)
-			vfree(slots);
+			kvfree(slots);
 	}
 }
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7bfb9bd1ca21..539b8885e3d1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -523,7 +523,7 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 	unsigned int i, nr;
 
 	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
-	spaces = vzalloc(sizeof(struct address_space) * nr);
+	spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL);
 	if (!spaces)
 		return -ENOMEM;
 	for (i = 0; i < nr; i++) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 53b5881ee0d6..90054f3c2cdc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2272,8 +2272,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	free_percpu(p->percpu_cluster);
 	p->percpu_cluster = NULL;
 	vfree(swap_map);
-	vfree(cluster_info);
-	vfree(frontswap_map);
+	kvfree(cluster_info);
+	kvfree(frontswap_map);
 	/* Destroy swap account information */
 	swap_cgroup_swapoff(p->type);
 	exit_swap_address_space(p->type);
@@ -2796,7 +2796,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
 		nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
 
-		cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info));
+		cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
+					GFP_KERNEL);
 		if (!cluster_info) {
 			error = -ENOMEM;
 			goto bad_swap;
@@ -2829,7 +2830,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	/* frontswap enabled? set up bit-per-page map for frontswap */
 	if (IS_ENABLED(CONFIG_FRONTSWAP))
-		frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
+		frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
+					 GFP_KERNEL);
 
 	if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
 		/*
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-20  8:47 ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-20  8:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel, Huang Ying,
	Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu, Gerald Schaefer,
	Kirill A. Shutemov, Hugh Dickins, Ingo Molnar, Vegard Nossum,
	linux-mm, linux-kernel

From: Huang Ying <ying.huang@intel.com>

Now vzalloc() is used in swap code to allocate various data
structures, such as swap cache, swap slots cache, cluster info, etc.
Because the size may be too large on some system, so that normal
kzalloc() may fail.  But using kzalloc() has some advantages, for
example, less memory fragmentation, less TLB pressure, etc.  So change
the data structure allocation in swap code to use kvzalloc() which
will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
failed.

The allocation for swap_map[] in struct swap_info_struct is not
changed, because that is usually quite large and vmalloc_to_page() is
used for it.  That makes it a little harder to change.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Tim Chen <tim.c.chen@intel.com>
---
 mm/swap_slots.c | 19 +++++++++++--------
 mm/swap_state.c |  2 +-
 mm/swapfile.c   | 10 ++++++----
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 9b5bc86f96ad..7376d2ffb2db 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -31,6 +31,7 @@
 #include <linux/cpumask.h>
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 
 #ifdef CONFIG_SWAP
 
@@ -119,16 +120,18 @@ static int alloc_swap_slot_cache(unsigned int cpu)
 
 	/*
 	 * Do allocation outside swap_slots_cache_mutex
-	 * as vzalloc could trigger reclaim and get_swap_page,
+	 * as kvzalloc could trigger reclaim and get_swap_page,
 	 * which can lock swap_slots_cache_mutex.
 	 */
-	slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+	slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
+			 GFP_KERNEL);
 	if (!slots)
 		return -ENOMEM;
 
-	slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+	slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
+			     GFP_KERNEL);
 	if (!slots_ret) {
-		vfree(slots);
+		kvfree(slots);
 		return -ENOMEM;
 	}
 
@@ -152,9 +155,9 @@ static int alloc_swap_slot_cache(unsigned int cpu)
 out:
 	mutex_unlock(&swap_slots_cache_mutex);
 	if (slots)
-		vfree(slots);
+		kvfree(slots);
 	if (slots_ret)
-		vfree(slots_ret);
+		kvfree(slots_ret);
 	return 0;
 }
 
@@ -171,7 +174,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
 		cache->cur = 0;
 		cache->nr = 0;
 		if (free_slots && cache->slots) {
-			vfree(cache->slots);
+			kvfree(cache->slots);
 			cache->slots = NULL;
 		}
 		mutex_unlock(&cache->alloc_lock);
@@ -186,7 +189,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
 		}
 		spin_unlock_irq(&cache->free_lock);
 		if (slots)
-			vfree(slots);
+			kvfree(slots);
 	}
 }
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7bfb9bd1ca21..539b8885e3d1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -523,7 +523,7 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 	unsigned int i, nr;
 
 	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
-	spaces = vzalloc(sizeof(struct address_space) * nr);
+	spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL);
 	if (!spaces)
 		return -ENOMEM;
 	for (i = 0; i < nr; i++) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 53b5881ee0d6..90054f3c2cdc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2272,8 +2272,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	free_percpu(p->percpu_cluster);
 	p->percpu_cluster = NULL;
 	vfree(swap_map);
-	vfree(cluster_info);
-	vfree(frontswap_map);
+	kvfree(cluster_info);
+	kvfree(frontswap_map);
 	/* Destroy swap account information */
 	swap_cgroup_swapoff(p->type);
 	exit_swap_address_space(p->type);
@@ -2796,7 +2796,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
 		nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
 
-		cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info));
+		cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
+					GFP_KERNEL);
 		if (!cluster_info) {
 			error = -ENOMEM;
 			goto bad_swap;
@@ -2829,7 +2830,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	/* frontswap enabled? set up bit-per-page map for frontswap */
 	if (IS_ENABLED(CONFIG_FRONTSWAP))
-		frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
+		frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
+					 GFP_KERNEL);
 
 	if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
 		/*
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH -v2 2/2] mm, swap: Sort swap entries before free
  2017-03-20  8:47 ` Huang, Ying
@ 2017-03-20  8:47   ` Huang, Ying
  -1 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-20  8:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel, Huang Ying,
	Tim Chen, Michal Hocko, Kirill A. Shutemov, Vegard Nossum,
	Ingo Molnar, linux-mm, linux-kernel

From: Huang Ying <ying.huang@intel.com>

To reduce the lock contention of swap_info_struct->lock when freeing
swap entry.  The freed swap entries will be collected in a per-CPU
buffer firstly, and be really freed later in batch.  During the batch
freeing, if the consecutive swap entries in the per-CPU buffer belongs
to same swap device, the swap_info_struct->lock needs to be
acquired/released only once, so that the lock contention could be
reduced greatly.  But if there are multiple swap devices, it is
possible that the lock may be unnecessarily released/acquired because
the swap entries belong to the same swap device are non-consecutive in
the per-CPU buffer.

To solve the issue, the per-CPU buffer is sorted according to the swap
device before freeing the swap entries.  Test shows that the time
spent by swapcache_free_entries() could be reduced after the patch.

Test the patch via measuring the run time of swap_cache_free_entries()
during the exit phase of the applications use much swap space.  The
results shows that the average run time of swap_cache_free_entries()
reduced about 20% after applying the patch.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Tim Chen <tim.c.chen@intel.com>
---
 mm/swapfile.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90054f3c2cdc..1628dd88da40 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -37,6 +37,7 @@
 #include <linux/swapfile.h>
 #include <linux/export.h>
 #include <linux/swap_slots.h>
+#include <linux/sort.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -1065,6 +1066,13 @@ void swapcache_free(swp_entry_t entry)
 	}
 }
 
+static int swp_entry_cmp(const void *ent1, const void *ent2)
+{
+	const swp_entry_t *e1 = ent1, *e2 = ent2;
+
+	return (long)(swp_type(*e1) - swp_type(*e2));
+}
+
 void swapcache_free_entries(swp_entry_t *entries, int n)
 {
 	struct swap_info_struct *p, *prev;
@@ -1075,6 +1083,7 @@ void swapcache_free_entries(swp_entry_t *entries, int n)
 
 	prev = NULL;
 	p = NULL;
+	sort(entries, n, sizeof(entries[0]), swp_entry_cmp, NULL);
 	for (i = 0; i < n; ++i) {
 		p = swap_info_get_cont(entries[i], prev);
 		if (p)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH -v2 2/2] mm, swap: Sort swap entries before free
@ 2017-03-20  8:47   ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-20  8:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel, Huang Ying,
	Tim Chen, Michal Hocko, Kirill A. Shutemov, Vegard Nossum,
	Ingo Molnar, linux-mm, linux-kernel

From: Huang Ying <ying.huang@intel.com>

To reduce the lock contention of swap_info_struct->lock when freeing
swap entry.  The freed swap entries will be collected in a per-CPU
buffer firstly, and be really freed later in batch.  During the batch
freeing, if the consecutive swap entries in the per-CPU buffer belongs
to same swap device, the swap_info_struct->lock needs to be
acquired/released only once, so that the lock contention could be
reduced greatly.  But if there are multiple swap devices, it is
possible that the lock may be unnecessarily released/acquired because
the swap entries belong to the same swap device are non-consecutive in
the per-CPU buffer.

To solve the issue, the per-CPU buffer is sorted according to the swap
device before freeing the swap entries.  Test shows that the time
spent by swapcache_free_entries() could be reduced after the patch.

Test the patch via measuring the run time of swap_cache_free_entries()
during the exit phase of the applications use much swap space.  The
results shows that the average run time of swap_cache_free_entries()
reduced about 20% after applying the patch.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Tim Chen <tim.c.chen@intel.com>
---
 mm/swapfile.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90054f3c2cdc..1628dd88da40 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -37,6 +37,7 @@
 #include <linux/swapfile.h>
 #include <linux/export.h>
 #include <linux/swap_slots.h>
+#include <linux/sort.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -1065,6 +1066,13 @@ void swapcache_free(swp_entry_t entry)
 	}
 }
 
+static int swp_entry_cmp(const void *ent1, const void *ent2)
+{
+	const swp_entry_t *e1 = ent1, *e2 = ent2;
+
+	return (long)(swp_type(*e1) - swp_type(*e2));
+}
+
 void swapcache_free_entries(swp_entry_t *entries, int n)
 {
 	struct swap_info_struct *p, *prev;
@@ -1075,6 +1083,7 @@ void swapcache_free_entries(swp_entry_t *entries, int n)
 
 	prev = NULL;
 	p = NULL;
+	sort(entries, n, sizeof(entries[0]), swp_entry_cmp, NULL);
 	for (i = 0; i < n; ++i) {
 		p = swap_info_get_cont(entries[i], prev);
 		if (p)
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-20  8:47 ` Huang, Ying
@ 2017-03-20 21:32   ` David Rientjes
  -1 siblings, 0 replies; 34+ messages in thread
From: David Rientjes @ 2017-03-20 21:32 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Andrew Morton, Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel,
	Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu, Gerald Schaefer,
	Kirill A. Shutemov, Hugh Dickins, Ingo Molnar, Vegard Nossum,
	linux-mm, linux-kernel

On Mon, 20 Mar 2017, Huang, Ying wrote:

> From: Huang Ying <ying.huang@intel.com>
> 
> Now vzalloc() is used in swap code to allocate various data
> structures, such as swap cache, swap slots cache, cluster info, etc.
> Because the size may be too large on some system, so that normal
> kzalloc() may fail.  But using kzalloc() has some advantages, for
> example, less memory fragmentation, less TLB pressure, etc.  So change
> the data structure allocation in swap code to use kvzalloc() which
> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
> failed.
> 

As questioned in -v1 of this patch, what is the benefit of directly 
compacting and reclaiming memory for high-order pages by first preferring 
kmalloc() if this does not require contiguous memory?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-20 21:32   ` David Rientjes
  0 siblings, 0 replies; 34+ messages in thread
From: David Rientjes @ 2017-03-20 21:32 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Andrew Morton, Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel,
	Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu, Gerald Schaefer,
	Kirill A. Shutemov, Hugh Dickins, Ingo Molnar, Vegard Nossum,
	linux-mm, linux-kernel

On Mon, 20 Mar 2017, Huang, Ying wrote:

> From: Huang Ying <ying.huang@intel.com>
> 
> Now vzalloc() is used in swap code to allocate various data
> structures, such as swap cache, swap slots cache, cluster info, etc.
> Because the size may be too large on some system, so that normal
> kzalloc() may fail.  But using kzalloc() has some advantages, for
> example, less memory fragmentation, less TLB pressure, etc.  So change
> the data structure allocation in swap code to use kvzalloc() which
> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
> failed.
> 

As questioned in -v1 of this patch, what is the benefit of directly 
compacting and reclaiming memory for high-order pages by first preferring 
kmalloc() if this does not require contiguous memory?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-20 21:32   ` David Rientjes
@ 2017-03-24  2:41     ` Huang, Ying
  -1 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-24  2:41 UTC (permalink / raw)
  To: David Rientjes
  Cc: Huang, Ying, Andrew Morton, Andi Kleen, Dave Hansen, Shaohua Li,
	Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

David Rientjes <rientjes@google.com> writes:

> On Mon, 20 Mar 2017, Huang, Ying wrote:
>
>> From: Huang Ying <ying.huang@intel.com>
>> 
>> Now vzalloc() is used in swap code to allocate various data
>> structures, such as swap cache, swap slots cache, cluster info, etc.
>> Because the size may be too large on some system, so that normal
>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>> example, less memory fragmentation, less TLB pressure, etc.  So change
>> the data structure allocation in swap code to use kvzalloc() which
>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>> failed.
>> 
>
> As questioned in -v1 of this patch, what is the benefit of directly 
> compacting and reclaiming memory for high-order pages by first preferring 
> kmalloc() if this does not require contiguous memory?

The memory allocation here is only for swap on time, not for swap out/in
time.  The performance of swap on is not considered critical.  But if
the kmalloc() is used instead of the vmalloc(), the swap out/in
performance could be improved (marginally).  More importantly, the
interference for the other activity on the system could be reduced, For
example, less memory fragmentation, less TLB usage of swap subsystem,
etc.

Best Regards,
Huang, Ying

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24  2:41     ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-24  2:41 UTC (permalink / raw)
  To: David Rientjes
  Cc: Huang, Ying, Andrew Morton, Andi Kleen, Dave Hansen, Shaohua Li,
	Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

David Rientjes <rientjes@google.com> writes:

> On Mon, 20 Mar 2017, Huang, Ying wrote:
>
>> From: Huang Ying <ying.huang@intel.com>
>> 
>> Now vzalloc() is used in swap code to allocate various data
>> structures, such as swap cache, swap slots cache, cluster info, etc.
>> Because the size may be too large on some system, so that normal
>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>> example, less memory fragmentation, less TLB pressure, etc.  So change
>> the data structure allocation in swap code to use kvzalloc() which
>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>> failed.
>> 
>
> As questioned in -v1 of this patch, what is the benefit of directly 
> compacting and reclaiming memory for high-order pages by first preferring 
> kmalloc() if this does not require contiguous memory?

The memory allocation here is only for swap on time, not for swap out/in
time.  The performance of swap on is not considered critical.  But if
the kmalloc() is used instead of the vmalloc(), the swap out/in
performance could be improved (marginally).  More importantly, the
interference for the other activity on the system could be reduced, For
example, less memory fragmentation, less TLB usage of swap subsystem,
etc.

Best Regards,
Huang, Ying

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24  2:41     ` Huang, Ying
@ 2017-03-24  4:27       ` John Hubbard
  -1 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24  4:27 UTC (permalink / raw)
  To: Huang, Ying, David Rientjes
  Cc: Andrew Morton, Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel,
	Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu, Gerald Schaefer,
	Kirill A. Shutemov, Hugh Dickins, Ingo Molnar, Vegard Nossum,
	linux-mm, linux-kernel

On 03/23/2017 07:41 PM, Huang, Ying wrote:
> David Rientjes <rientjes@google.com> writes:
>
>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>
>>> From: Huang Ying <ying.huang@intel.com>
>>>
>>> Now vzalloc() is used in swap code to allocate various data
>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>> Because the size may be too large on some system, so that normal
>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>> the data structure allocation in swap code to use kvzalloc() which
>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>> failed.
>>>
>>
>> As questioned in -v1 of this patch, what is the benefit of directly
>> compacting and reclaiming memory for high-order pages by first preferring
>> kmalloc() if this does not require contiguous memory?
>
> The memory allocation here is only for swap on time, not for swap out/in
> time.  The performance of swap on is not considered critical.  But if
> the kmalloc() is used instead of the vmalloc(), the swap out/in
> performance could be improved (marginally).  More importantly, the
> interference for the other activity on the system could be reduced, For
> example, less memory fragmentation, less TLB usage of swap subsystem,
> etc.

Hi Ying,

I'm a little surprised to see vmalloc calls replaced with kmalloc-then-vmalloc calls, because that 
actually makes fragmentation worse (contrary to the above claim). That's because you will consume 
contiguous memory (even though you don't need it to be contiguous), whereas before, you would have 
been able to get by with page-at-a-time for vmalloc.

So, things like THP will find fewer contiguous chunks, as a result of patches such as this.

--
thanks,
john h

>
> Best Regards,
> Huang, Ying
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24  4:27       ` John Hubbard
  0 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24  4:27 UTC (permalink / raw)
  To: Huang, Ying, David Rientjes
  Cc: Andrew Morton, Andi Kleen, Dave Hansen, Shaohua Li, Rik van Riel,
	Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu, Gerald Schaefer,
	Kirill A. Shutemov, Hugh Dickins, Ingo Molnar, Vegard Nossum,
	linux-mm, linux-kernel

On 03/23/2017 07:41 PM, Huang, Ying wrote:
> David Rientjes <rientjes@google.com> writes:
>
>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>
>>> From: Huang Ying <ying.huang@intel.com>
>>>
>>> Now vzalloc() is used in swap code to allocate various data
>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>> Because the size may be too large on some system, so that normal
>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>> the data structure allocation in swap code to use kvzalloc() which
>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>> failed.
>>>
>>
>> As questioned in -v1 of this patch, what is the benefit of directly
>> compacting and reclaiming memory for high-order pages by first preferring
>> kmalloc() if this does not require contiguous memory?
>
> The memory allocation here is only for swap on time, not for swap out/in
> time.  The performance of swap on is not considered critical.  But if
> the kmalloc() is used instead of the vmalloc(), the swap out/in
> performance could be improved (marginally).  More importantly, the
> interference for the other activity on the system could be reduced, For
> example, less memory fragmentation, less TLB usage of swap subsystem,
> etc.

Hi Ying,

I'm a little surprised to see vmalloc calls replaced with kmalloc-then-vmalloc calls, because that 
actually makes fragmentation worse (contrary to the above claim). That's because you will consume 
contiguous memory (even though you don't need it to be contiguous), whereas before, you would have 
been able to get by with page-at-a-time for vmalloc.

So, things like THP will find fewer contiguous chunks, as a result of patches such as this.

--
thanks,
john h

>
> Best Regards,
> Huang, Ying
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24  4:27       ` John Hubbard
@ 2017-03-24  4:52         ` Huang, Ying
  -1 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-24  4:52 UTC (permalink / raw)
  To: John Hubbard
  Cc: Huang, Ying, David Rientjes, Andrew Morton, Andi Kleen,
	Dave Hansen, Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

John Hubbard <jhubbard@nvidia.com> writes:

> On 03/23/2017 07:41 PM, Huang, Ying wrote:
>> David Rientjes <rientjes@google.com> writes:
>>
>>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>>
>>>> From: Huang Ying <ying.huang@intel.com>
>>>>
>>>> Now vzalloc() is used in swap code to allocate various data
>>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>>> Because the size may be too large on some system, so that normal
>>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>>> the data structure allocation in swap code to use kvzalloc() which
>>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>>> failed.
>>>>
>>>
>>> As questioned in -v1 of this patch, what is the benefit of directly
>>> compacting and reclaiming memory for high-order pages by first preferring
>>> kmalloc() if this does not require contiguous memory?
>>
>> The memory allocation here is only for swap on time, not for swap out/in
>> time.  The performance of swap on is not considered critical.  But if
>> the kmalloc() is used instead of the vmalloc(), the swap out/in
>> performance could be improved (marginally).  More importantly, the
>> interference for the other activity on the system could be reduced, For
>> example, less memory fragmentation, less TLB usage of swap subsystem,
>> etc.
>
> Hi Ying,
>
> I'm a little surprised to see vmalloc calls replaced with
> kmalloc-then-vmalloc calls, because that actually makes fragmentation
> worse (contrary to the above claim). That's because you will consume
> contiguous memory (even though you don't need it to be contiguous),
> whereas before, you would have been able to get by with page-at-a-time
> for vmalloc.
>
> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.

Hi, John,

I don't think so.  The pages allocated by vmalloc() cannot be moved
during de-fragment.  For example, if 512 dis-continuous physical pages
are allocated via vmalloc(), at worst, one page will be allocate from
one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
memory cannot be used for THP allocation.  Because these pages cannot be
defragmented until vfree().

Best Regards,
Huang, Ying

> --
> thanks,
> john h
>
>>
>> Best Regards,
>> Huang, Ying
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24  4:52         ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-24  4:52 UTC (permalink / raw)
  To: John Hubbard
  Cc: Huang, Ying, David Rientjes, Andrew Morton, Andi Kleen,
	Dave Hansen, Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

John Hubbard <jhubbard@nvidia.com> writes:

> On 03/23/2017 07:41 PM, Huang, Ying wrote:
>> David Rientjes <rientjes@google.com> writes:
>>
>>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>>
>>>> From: Huang Ying <ying.huang@intel.com>
>>>>
>>>> Now vzalloc() is used in swap code to allocate various data
>>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>>> Because the size may be too large on some system, so that normal
>>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>>> the data structure allocation in swap code to use kvzalloc() which
>>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>>> failed.
>>>>
>>>
>>> As questioned in -v1 of this patch, what is the benefit of directly
>>> compacting and reclaiming memory for high-order pages by first preferring
>>> kmalloc() if this does not require contiguous memory?
>>
>> The memory allocation here is only for swap on time, not for swap out/in
>> time.  The performance of swap on is not considered critical.  But if
>> the kmalloc() is used instead of the vmalloc(), the swap out/in
>> performance could be improved (marginally).  More importantly, the
>> interference for the other activity on the system could be reduced, For
>> example, less memory fragmentation, less TLB usage of swap subsystem,
>> etc.
>
> Hi Ying,
>
> I'm a little surprised to see vmalloc calls replaced with
> kmalloc-then-vmalloc calls, because that actually makes fragmentation
> worse (contrary to the above claim). That's because you will consume
> contiguous memory (even though you don't need it to be contiguous),
> whereas before, you would have been able to get by with page-at-a-time
> for vmalloc.
>
> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.

Hi, John,

I don't think so.  The pages allocated by vmalloc() cannot be moved
during de-fragment.  For example, if 512 dis-continuous physical pages
are allocated via vmalloc(), at worst, one page will be allocate from
one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
memory cannot be used for THP allocation.  Because these pages cannot be
defragmented until vfree().

Best Regards,
Huang, Ying

> --
> thanks,
> john h
>
>>
>> Best Regards,
>> Huang, Ying
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24  4:52         ` Huang, Ying
@ 2017-03-24  6:48           ` John Hubbard
  -1 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24  6:48 UTC (permalink / raw)
  To: Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Dave Hansen,
	Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

On 03/23/2017 09:52 PM, Huang, Ying wrote:
> John Hubbard <jhubbard@nvidia.com> writes:
>
>> On 03/23/2017 07:41 PM, Huang, Ying wrote:
>>> David Rientjes <rientjes@google.com> writes:
>>>
>>>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>>>
>>>>> From: Huang Ying <ying.huang@intel.com>
>>>>>
>>>>> Now vzalloc() is used in swap code to allocate various data
>>>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>>>> Because the size may be too large on some system, so that normal
>>>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>>>> the data structure allocation in swap code to use kvzalloc() which
>>>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>>>> failed.
>>>>>
>>>>
>>>> As questioned in -v1 of this patch, what is the benefit of directly
>>>> compacting and reclaiming memory for high-order pages by first preferring
>>>> kmalloc() if this does not require contiguous memory?
>>>
>>> The memory allocation here is only for swap on time, not for swap out/in
>>> time.  The performance of swap on is not considered critical.  But if
>>> the kmalloc() is used instead of the vmalloc(), the swap out/in
>>> performance could be improved (marginally).  More importantly, the
>>> interference for the other activity on the system could be reduced, For
>>> example, less memory fragmentation, less TLB usage of swap subsystem,
>>> etc.
>>
>> Hi Ying,
>>
>> I'm a little surprised to see vmalloc calls replaced with
>> kmalloc-then-vmalloc calls, because that actually makes fragmentation
>> worse (contrary to the above claim). That's because you will consume
>> contiguous memory (even though you don't need it to be contiguous),
>> whereas before, you would have been able to get by with page-at-a-time
>> for vmalloc.
>>
>> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.
>
> Hi, John,
>
> I don't think so.  The pages allocated by vmalloc() cannot be moved
> during de-fragment.  For example, if 512 dis-continuous physical pages
> are allocated via vmalloc(), at worst, one page will be allocate from
> one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
> memory cannot be used for THP allocation.  Because these pages cannot be
> defragmented until vfree().

kmalloc requires a resource that vmalloc does not: contiguous pages. Therefore, 
given the same mix of pages (some groups of contiguous pages, and a scattering of 
isolated single-page, or too-small-to-satisfy-entire-alloc groups of pages, and the 
same underlying page allocator, kmalloc *must* consume the more valuable contiguous 
pages. However, vmalloc *may* consume those same pages.

So, if you run kmalloc a bunch of times, with higher-order requests, you *will* run 
out of contiguous pages (until more are freed up). If you run vmalloc with the same 
initial conditions and the same requests, you may not necessary use up those 
contiguous pages.

It's true that there are benefits to doing a kmalloc-then-vmalloc, of course: if the 
pages are available, it's faster and uses less resources. Yes. I just don't think 
"less fragmentation" should be listed as a benefit, because you can definitely cause 
*more* fragmentation if you use up contiguous blocks unnecessarily.

--
thanks,
john h

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24  6:48           ` John Hubbard
  0 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24  6:48 UTC (permalink / raw)
  To: Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Dave Hansen,
	Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

On 03/23/2017 09:52 PM, Huang, Ying wrote:
> John Hubbard <jhubbard@nvidia.com> writes:
>
>> On 03/23/2017 07:41 PM, Huang, Ying wrote:
>>> David Rientjes <rientjes@google.com> writes:
>>>
>>>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>>>
>>>>> From: Huang Ying <ying.huang@intel.com>
>>>>>
>>>>> Now vzalloc() is used in swap code to allocate various data
>>>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>>>> Because the size may be too large on some system, so that normal
>>>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>>>> the data structure allocation in swap code to use kvzalloc() which
>>>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>>>> failed.
>>>>>
>>>>
>>>> As questioned in -v1 of this patch, what is the benefit of directly
>>>> compacting and reclaiming memory for high-order pages by first preferring
>>>> kmalloc() if this does not require contiguous memory?
>>>
>>> The memory allocation here is only for swap on time, not for swap out/in
>>> time.  The performance of swap on is not considered critical.  But if
>>> the kmalloc() is used instead of the vmalloc(), the swap out/in
>>> performance could be improved (marginally).  More importantly, the
>>> interference for the other activity on the system could be reduced, For
>>> example, less memory fragmentation, less TLB usage of swap subsystem,
>>> etc.
>>
>> Hi Ying,
>>
>> I'm a little surprised to see vmalloc calls replaced with
>> kmalloc-then-vmalloc calls, because that actually makes fragmentation
>> worse (contrary to the above claim). That's because you will consume
>> contiguous memory (even though you don't need it to be contiguous),
>> whereas before, you would have been able to get by with page-at-a-time
>> for vmalloc.
>>
>> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.
>
> Hi, John,
>
> I don't think so.  The pages allocated by vmalloc() cannot be moved
> during de-fragment.  For example, if 512 dis-continuous physical pages
> are allocated via vmalloc(), at worst, one page will be allocate from
> one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
> memory cannot be used for THP allocation.  Because these pages cannot be
> defragmented until vfree().

kmalloc requires a resource that vmalloc does not: contiguous pages. Therefore, 
given the same mix of pages (some groups of contiguous pages, and a scattering of 
isolated single-page, or too-small-to-satisfy-entire-alloc groups of pages, and the 
same underlying page allocator, kmalloc *must* consume the more valuable contiguous 
pages. However, vmalloc *may* consume those same pages.

So, if you run kmalloc a bunch of times, with higher-order requests, you *will* run 
out of contiguous pages (until more are freed up). If you run vmalloc with the same 
initial conditions and the same requests, you may not necessary use up those 
contiguous pages.

It's true that there are benefits to doing a kmalloc-then-vmalloc, of course: if the 
pages are available, it's faster and uses less resources. Yes. I just don't think 
"less fragmentation" should be listed as a benefit, because you can definitely cause 
*more* fragmentation if you use up contiguous blocks unnecessarily.

--
thanks,
john h

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24  6:48           ` John Hubbard
@ 2017-03-24  7:16             ` Huang, Ying
  -1 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-24  7:16 UTC (permalink / raw)
  To: John Hubbard
  Cc: Huang, Ying, David Rientjes, Andrew Morton, Andi Kleen,
	Dave Hansen, Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

John Hubbard <jhubbard@nvidia.com> writes:

> On 03/23/2017 09:52 PM, Huang, Ying wrote:
>> John Hubbard <jhubbard@nvidia.com> writes:
>>
>>> On 03/23/2017 07:41 PM, Huang, Ying wrote:
>>>> David Rientjes <rientjes@google.com> writes:
>>>>
>>>>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>>>>
>>>>>> From: Huang Ying <ying.huang@intel.com>
>>>>>>
>>>>>> Now vzalloc() is used in swap code to allocate various data
>>>>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>>>>> Because the size may be too large on some system, so that normal
>>>>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>>>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>>>>> the data structure allocation in swap code to use kvzalloc() which
>>>>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>>>>> failed.
>>>>>>
>>>>>
>>>>> As questioned in -v1 of this patch, what is the benefit of directly
>>>>> compacting and reclaiming memory for high-order pages by first preferring
>>>>> kmalloc() if this does not require contiguous memory?
>>>>
>>>> The memory allocation here is only for swap on time, not for swap out/in
>>>> time.  The performance of swap on is not considered critical.  But if
>>>> the kmalloc() is used instead of the vmalloc(), the swap out/in
>>>> performance could be improved (marginally).  More importantly, the
>>>> interference for the other activity on the system could be reduced, For
>>>> example, less memory fragmentation, less TLB usage of swap subsystem,
>>>> etc.
>>>
>>> Hi Ying,
>>>
>>> I'm a little surprised to see vmalloc calls replaced with
>>> kmalloc-then-vmalloc calls, because that actually makes fragmentation
>>> worse (contrary to the above claim). That's because you will consume
>>> contiguous memory (even though you don't need it to be contiguous),
>>> whereas before, you would have been able to get by with page-at-a-time
>>> for vmalloc.
>>>
>>> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.
>>
>> Hi, John,
>>
>> I don't think so.  The pages allocated by vmalloc() cannot be moved
>> during de-fragment.  For example, if 512 dis-continuous physical pages
>> are allocated via vmalloc(), at worst, one page will be allocate from
>> one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
>> memory cannot be used for THP allocation.  Because these pages cannot be
>> defragmented until vfree().
>
> kmalloc requires a resource that vmalloc does not: contiguous
> pages. Therefore, given the same mix of pages (some groups of
> contiguous pages, and a scattering of isolated single-page, or
> too-small-to-satisfy-entire-alloc groups of pages, and the same
> underlying page allocator, kmalloc *must* consume the more valuable
> contiguous pages. However, vmalloc *may* consume those same pages.
>
> So, if you run kmalloc a bunch of times, with higher-order requests,
> you *will* run out of contiguous pages (until more are freed up). If
> you run vmalloc with the same initial conditions and the same
> requests, you may not necessary use up those contiguous pages.
>
> It's true that there are benefits to doing a kmalloc-then-vmalloc, of
> course: if the pages are available, it's faster and uses less
> resources. Yes. I just don't think "less fragmentation" should be
> listed as a benefit, because you can definitely cause *more*
> fragmentation if you use up contiguous blocks unnecessarily.

Yes, I agree that for some cases, kmalloc() will use more contiguous
blocks, for example, non-movable pages are scattered all over the
memory.  But I still think in common cases, if defragement is enabled,
and non-movable pages allocation is restricted to some memory area if
possible, kmalloc() is better than vmalloc() as for fragmentation.

Best Regards,
Huang, Ying

> --
> thanks,
> john h

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24  7:16             ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-03-24  7:16 UTC (permalink / raw)
  To: John Hubbard
  Cc: Huang, Ying, David Rientjes, Andrew Morton, Andi Kleen,
	Dave Hansen, Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

John Hubbard <jhubbard@nvidia.com> writes:

> On 03/23/2017 09:52 PM, Huang, Ying wrote:
>> John Hubbard <jhubbard@nvidia.com> writes:
>>
>>> On 03/23/2017 07:41 PM, Huang, Ying wrote:
>>>> David Rientjes <rientjes@google.com> writes:
>>>>
>>>>> On Mon, 20 Mar 2017, Huang, Ying wrote:
>>>>>
>>>>>> From: Huang Ying <ying.huang@intel.com>
>>>>>>
>>>>>> Now vzalloc() is used in swap code to allocate various data
>>>>>> structures, such as swap cache, swap slots cache, cluster info, etc.
>>>>>> Because the size may be too large on some system, so that normal
>>>>>> kzalloc() may fail.  But using kzalloc() has some advantages, for
>>>>>> example, less memory fragmentation, less TLB pressure, etc.  So change
>>>>>> the data structure allocation in swap code to use kvzalloc() which
>>>>>> will try kzalloc() firstly, and fallback to vzalloc() if kzalloc()
>>>>>> failed.
>>>>>>
>>>>>
>>>>> As questioned in -v1 of this patch, what is the benefit of directly
>>>>> compacting and reclaiming memory for high-order pages by first preferring
>>>>> kmalloc() if this does not require contiguous memory?
>>>>
>>>> The memory allocation here is only for swap on time, not for swap out/in
>>>> time.  The performance of swap on is not considered critical.  But if
>>>> the kmalloc() is used instead of the vmalloc(), the swap out/in
>>>> performance could be improved (marginally).  More importantly, the
>>>> interference for the other activity on the system could be reduced, For
>>>> example, less memory fragmentation, less TLB usage of swap subsystem,
>>>> etc.
>>>
>>> Hi Ying,
>>>
>>> I'm a little surprised to see vmalloc calls replaced with
>>> kmalloc-then-vmalloc calls, because that actually makes fragmentation
>>> worse (contrary to the above claim). That's because you will consume
>>> contiguous memory (even though you don't need it to be contiguous),
>>> whereas before, you would have been able to get by with page-at-a-time
>>> for vmalloc.
>>>
>>> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.
>>
>> Hi, John,
>>
>> I don't think so.  The pages allocated by vmalloc() cannot be moved
>> during de-fragment.  For example, if 512 dis-continuous physical pages
>> are allocated via vmalloc(), at worst, one page will be allocate from
>> one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
>> memory cannot be used for THP allocation.  Because these pages cannot be
>> defragmented until vfree().
>
> kmalloc requires a resource that vmalloc does not: contiguous
> pages. Therefore, given the same mix of pages (some groups of
> contiguous pages, and a scattering of isolated single-page, or
> too-small-to-satisfy-entire-alloc groups of pages, and the same
> underlying page allocator, kmalloc *must* consume the more valuable
> contiguous pages. However, vmalloc *may* consume those same pages.
>
> So, if you run kmalloc a bunch of times, with higher-order requests,
> you *will* run out of contiguous pages (until more are freed up). If
> you run vmalloc with the same initial conditions and the same
> requests, you may not necessary use up those contiguous pages.
>
> It's true that there are benefits to doing a kmalloc-then-vmalloc, of
> course: if the pages are available, it's faster and uses less
> resources. Yes. I just don't think "less fragmentation" should be
> listed as a benefit, because you can definitely cause *more*
> fragmentation if you use up contiguous blocks unnecessarily.

Yes, I agree that for some cases, kmalloc() will use more contiguous
blocks, for example, non-movable pages are scattered all over the
memory.  But I still think in common cases, if defragement is enabled,
and non-movable pages allocation is restricted to some memory area if
possible, kmalloc() is better than vmalloc() as for fragmentation.

Best Regards,
Huang, Ying

> --
> thanks,
> john h

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24  7:16             ` Huang, Ying
@ 2017-03-24  7:33               ` John Hubbard
  -1 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24  7:33 UTC (permalink / raw)
  To: Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Dave Hansen,
	Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

[...]
>>>> Hi Ying,
>>>>
>>>> I'm a little surprised to see vmalloc calls replaced with
>>>> kmalloc-then-vmalloc calls, because that actually makes fragmentation
>>>> worse (contrary to the above claim). That's because you will consume
>>>> contiguous memory (even though you don't need it to be contiguous),
>>>> whereas before, you would have been able to get by with page-at-a-time
>>>> for vmalloc.
>>>>
>>>> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.
>>>
>>> Hi, John,
>>>
>>> I don't think so.  The pages allocated by vmalloc() cannot be moved
>>> during de-fragment.  For example, if 512 dis-continuous physical pages
>>> are allocated via vmalloc(), at worst, one page will be allocate from
>>> one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
>>> memory cannot be used for THP allocation.  Because these pages cannot be
>>> defragmented until vfree().
>>
>> kmalloc requires a resource that vmalloc does not: contiguous
>> pages. Therefore, given the same mix of pages (some groups of
>> contiguous pages, and a scattering of isolated single-page, or
>> too-small-to-satisfy-entire-alloc groups of pages, and the same
>> underlying page allocator, kmalloc *must* consume the more valuable
>> contiguous pages. However, vmalloc *may* consume those same pages.
>>
>> So, if you run kmalloc a bunch of times, with higher-order requests,
>> you *will* run out of contiguous pages (until more are freed up). If
>> you run vmalloc with the same initial conditions and the same
>> requests, you may not necessary use up those contiguous pages.
>>
>> It's true that there are benefits to doing a kmalloc-then-vmalloc, of
>> course: if the pages are available, it's faster and uses less
>> resources. Yes. I just don't think "less fragmentation" should be
>> listed as a benefit, because you can definitely cause *more*
>> fragmentation if you use up contiguous blocks unnecessarily.
>
> Yes, I agree that for some cases, kmalloc() will use more contiguous
> blocks, for example, non-movable pages are scattered all over the
> memory.  But I still think in common cases, if defragement is enabled,
> and non-movable pages allocation is restricted to some memory area if
> possible, kmalloc() is better than vmalloc() as for fragmentation.


There might be some additional information you are using to come up with that 
conclusion, that is not obvious to me. Any thoughts there? These calls use the same 
underlying page allocator (and I thought that both were subject to the same 
constraints on defragmentation, as a result of that). So I am not seeing any way 
that kmalloc could possibly be a less-fragmenting call than vmalloc.

--
thanks,
john h

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24  7:33               ` John Hubbard
  0 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24  7:33 UTC (permalink / raw)
  To: Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Dave Hansen,
	Shaohua Li, Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

[...]
>>>> Hi Ying,
>>>>
>>>> I'm a little surprised to see vmalloc calls replaced with
>>>> kmalloc-then-vmalloc calls, because that actually makes fragmentation
>>>> worse (contrary to the above claim). That's because you will consume
>>>> contiguous memory (even though you don't need it to be contiguous),
>>>> whereas before, you would have been able to get by with page-at-a-time
>>>> for vmalloc.
>>>>
>>>> So, things like THP will find fewer contiguous chunks, as a result of patches such as this.
>>>
>>> Hi, John,
>>>
>>> I don't think so.  The pages allocated by vmalloc() cannot be moved
>>> during de-fragment.  For example, if 512 dis-continuous physical pages
>>> are allocated via vmalloc(), at worst, one page will be allocate from
>>> one distinct 2MB continous physical pages.  This makes 512 * 2MB = 1GB
>>> memory cannot be used for THP allocation.  Because these pages cannot be
>>> defragmented until vfree().
>>
>> kmalloc requires a resource that vmalloc does not: contiguous
>> pages. Therefore, given the same mix of pages (some groups of
>> contiguous pages, and a scattering of isolated single-page, or
>> too-small-to-satisfy-entire-alloc groups of pages, and the same
>> underlying page allocator, kmalloc *must* consume the more valuable
>> contiguous pages. However, vmalloc *may* consume those same pages.
>>
>> So, if you run kmalloc a bunch of times, with higher-order requests,
>> you *will* run out of contiguous pages (until more are freed up). If
>> you run vmalloc with the same initial conditions and the same
>> requests, you may not necessary use up those contiguous pages.
>>
>> It's true that there are benefits to doing a kmalloc-then-vmalloc, of
>> course: if the pages are available, it's faster and uses less
>> resources. Yes. I just don't think "less fragmentation" should be
>> listed as a benefit, because you can definitely cause *more*
>> fragmentation if you use up contiguous blocks unnecessarily.
>
> Yes, I agree that for some cases, kmalloc() will use more contiguous
> blocks, for example, non-movable pages are scattered all over the
> memory.  But I still think in common cases, if defragement is enabled,
> and non-movable pages allocation is restricted to some memory area if
> possible, kmalloc() is better than vmalloc() as for fragmentation.


There might be some additional information you are using to come up with that 
conclusion, that is not obvious to me. Any thoughts there? These calls use the same 
underlying page allocator (and I thought that both were subject to the same 
constraints on defragmentation, as a result of that). So I am not seeing any way 
that kmalloc could possibly be a less-fragmenting call than vmalloc.

--
thanks,
john h

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24  7:33               ` John Hubbard
@ 2017-03-24 13:56                 ` Dave Hansen
  -1 siblings, 0 replies; 34+ messages in thread
From: Dave Hansen @ 2017-03-24 13:56 UTC (permalink / raw)
  To: John Hubbard, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On 03/24/2017 12:33 AM, John Hubbard wrote:
> There might be some additional information you are using to come up with
> that conclusion, that is not obvious to me. Any thoughts there? These
> calls use the same underlying page allocator (and I thought that both
> were subject to the same constraints on defragmentation, as a result of
> that). So I am not seeing any way that kmalloc could possibly be a
> less-fragmenting call than vmalloc.

You guys are having quite a discussion over a very small point.

But, Ying is right.

Let's say we have a two-page data structure.  vmalloc() takes two
effectively random order-0 pages, probably from two different 2M pages
and pins them.  That "kills" two 2M pages.

kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
a 2M boundary (it theoretically could).  That means it will only "kill"
the possibility of a single 2M page.  More 2M pages == less fragmentation.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24 13:56                 ` Dave Hansen
  0 siblings, 0 replies; 34+ messages in thread
From: Dave Hansen @ 2017-03-24 13:56 UTC (permalink / raw)
  To: John Hubbard, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On 03/24/2017 12:33 AM, John Hubbard wrote:
> There might be some additional information you are using to come up with
> that conclusion, that is not obvious to me. Any thoughts there? These
> calls use the same underlying page allocator (and I thought that both
> were subject to the same constraints on defragmentation, as a result of
> that). So I am not seeing any way that kmalloc could possibly be a
> less-fragmenting call than vmalloc.

You guys are having quite a discussion over a very small point.

But, Ying is right.

Let's say we have a two-page data structure.  vmalloc() takes two
effectively random order-0 pages, probably from two different 2M pages
and pins them.  That "kills" two 2M pages.

kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
a 2M boundary (it theoretically could).  That means it will only "kill"
the possibility of a single 2M page.  More 2M pages == less fragmentation.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24 13:56                 ` Dave Hansen
@ 2017-03-24 16:52                   ` Tim Chen
  -1 siblings, 0 replies; 34+ messages in thread
From: Tim Chen @ 2017-03-24 16:52 UTC (permalink / raw)
  To: Dave Hansen, John Hubbard, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On Fri, 2017-03-24 at 06:56 -0700, Dave Hansen wrote:
> On 03/24/2017 12:33 AM, John Hubbard wrote:
> > 
> > There might be some additional information you are using to come up with
> > that conclusion, that is not obvious to me. Any thoughts there? These
> > calls use the same underlying page allocator (and I thought that both
> > were subject to the same constraints on defragmentation, as a result of
> > that). So I am not seeing any way that kmalloc could possibly be a
> > less-fragmenting call than vmalloc.
> You guys are having quite a discussion over a very small point.
> 
> But, Ying is right.
> 
> Let's say we have a two-page data structure.  vmalloc() takes two
> effectively random order-0 pages, probably from two different 2M pages
> and pins them.  That "kills" two 2M pages.
> 
> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> a 2M boundary (it theoretically could).  That means it will only "kill"
> the possibility of a single 2M page.  More 2M pages == less fragmentation.

In vmalloc, it eventually calls __vmalloc_area_node that allocates the
page one at a time.  There's no attempt there to make the pages contiguous
if I am reading the code correctly.  So that will increase the memory
fragmentation as we will be piecing together pages from all over the places.  

Tim  

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24 16:52                   ` Tim Chen
  0 siblings, 0 replies; 34+ messages in thread
From: Tim Chen @ 2017-03-24 16:52 UTC (permalink / raw)
  To: Dave Hansen, John Hubbard, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On Fri, 2017-03-24 at 06:56 -0700, Dave Hansen wrote:
> On 03/24/2017 12:33 AM, John Hubbard wrote:
> > 
> > There might be some additional information you are using to come up with
> > that conclusion, that is not obvious to me. Any thoughts there? These
> > calls use the same underlying page allocator (and I thought that both
> > were subject to the same constraints on defragmentation, as a result of
> > that). So I am not seeing any way that kmalloc could possibly be a
> > less-fragmenting call than vmalloc.
> You guys are having quite a discussion over a very small point.
> 
> But, Ying is right.
> 
> Let's say we have a two-page data structure.A A vmalloc() takes two
> effectively random order-0 pages, probably from two different 2M pages
> and pins them.A A That "kills" two 2M pages.
> 
> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> a 2M boundary (it theoretically could).A A That means it will only "kill"
> the possibility of a single 2M page.A A More 2M pages == less fragmentation.

In vmalloc, it eventually calls __vmalloc_area_node that allocates the
page one at a time. A There's no attempt there to make the pages contiguous
if I am reading the code correctly. A So that will increase the memory
fragmentation as we will be piecing together pages from all over the places. A 

Tim A 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24 16:52                   ` Tim Chen
@ 2017-03-24 18:15                     ` John Hubbard
  -1 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24 18:15 UTC (permalink / raw)
  To: Tim Chen, Dave Hansen, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On 03/24/2017 09:52 AM, Tim Chen wrote:
> On Fri, 2017-03-24 at 06:56 -0700, Dave Hansen wrote:
>> On 03/24/2017 12:33 AM, John Hubbard wrote:
>>>
>>> There might be some additional information you are using to come up with
>>> that conclusion, that is not obvious to me. Any thoughts there? These
>>> calls use the same underlying page allocator (and I thought that both
>>> were subject to the same constraints on defragmentation, as a result of
>>> that). So I am not seeing any way that kmalloc could possibly be a
>>> less-fragmenting call than vmalloc.
>> You guys are having quite a discussion over a very small point.
>>
>> But, Ying is right.
>>
>> Let's say we have a two-page data structure.  vmalloc() takes two
>> effectively random order-0 pages, probably from two different 2M pages
>> and pins them.  That "kills" two 2M pages.
>>
>> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
>> a 2M boundary (it theoretically could).  That means it will only "kill"
>> the possibility of a single 2M page.  More 2M pages == less fragmentation.
>
> In vmalloc, it eventually calls __vmalloc_area_node that allocates the
> page one at a time.  There's no attempt there to make the pages contiguous
> if I am reading the code correctly.  So that will increase the memory
> fragmentation as we will be piecing together pages from all over the places.
>
> Tim

OK. Thanks everyone for spelling it out for me, before I started doing larger projects, with an 
incorrect way of looking at the fragmentation behavior. :)

--
thanks,
john h

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-24 18:15                     ` John Hubbard
  0 siblings, 0 replies; 34+ messages in thread
From: John Hubbard @ 2017-03-24 18:15 UTC (permalink / raw)
  To: Tim Chen, Dave Hansen, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On 03/24/2017 09:52 AM, Tim Chen wrote:
> On Fri, 2017-03-24 at 06:56 -0700, Dave Hansen wrote:
>> On 03/24/2017 12:33 AM, John Hubbard wrote:
>>>
>>> There might be some additional information you are using to come up with
>>> that conclusion, that is not obvious to me. Any thoughts there? These
>>> calls use the same underlying page allocator (and I thought that both
>>> were subject to the same constraints on defragmentation, as a result of
>>> that). So I am not seeing any way that kmalloc could possibly be a
>>> less-fragmenting call than vmalloc.
>> You guys are having quite a discussion over a very small point.
>>
>> But, Ying is right.
>>
>> Let's say we have a two-page data structure.  vmalloc() takes two
>> effectively random order-0 pages, probably from two different 2M pages
>> and pins them.  That "kills" two 2M pages.
>>
>> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
>> a 2M boundary (it theoretically could).  That means it will only "kill"
>> the possibility of a single 2M page.  More 2M pages == less fragmentation.
>
> In vmalloc, it eventually calls __vmalloc_area_node that allocates the
> page one at a time.  There's no attempt there to make the pages contiguous
> if I am reading the code correctly.  So that will increase the memory
> fragmentation as we will be piecing together pages from all over the places.
>
> Tim

OK. Thanks everyone for spelling it out for me, before I started doing larger projects, with an 
incorrect way of looking at the fragmentation behavior. :)

--
thanks,
john h

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24 13:56                 ` Dave Hansen
@ 2017-03-30 16:31                   ` Michal Hocko
  -1 siblings, 0 replies; 34+ messages in thread
From: Michal Hocko @ 2017-03-30 16:31 UTC (permalink / raw)
  To: Dave Hansen
  Cc: John Hubbard, Huang, Ying, David Rientjes, Andrew Morton,
	Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

On Fri 24-03-17 06:56:10, Dave Hansen wrote:
> On 03/24/2017 12:33 AM, John Hubbard wrote:
> > There might be some additional information you are using to come up with
> > that conclusion, that is not obvious to me. Any thoughts there? These
> > calls use the same underlying page allocator (and I thought that both
> > were subject to the same constraints on defragmentation, as a result of
> > that). So I am not seeing any way that kmalloc could possibly be a
> > less-fragmenting call than vmalloc.
> 
> You guys are having quite a discussion over a very small point.
> 
> But, Ying is right.
> 
> Let's say we have a two-page data structure.  vmalloc() takes two
> effectively random order-0 pages, probably from two different 2M pages
> and pins them.  That "kills" two 2M pages.
> 
> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> a 2M boundary (it theoretically could).  That means it will only "kill"
> the possibility of a single 2M page.  More 2M pages == less fragmentation.

Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
to not try too hard on the kmalloc side so I really didn't get the
objection about direct compaction and reclaim which initially started
this discussion. Besides that the swapon path usually happens early
during the boot where we should have those larger blocks available.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-03-30 16:31                   ` Michal Hocko
  0 siblings, 0 replies; 34+ messages in thread
From: Michal Hocko @ 2017-03-30 16:31 UTC (permalink / raw)
  To: Dave Hansen
  Cc: John Hubbard, Huang, Ying, David Rientjes, Andrew Morton,
	Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

On Fri 24-03-17 06:56:10, Dave Hansen wrote:
> On 03/24/2017 12:33 AM, John Hubbard wrote:
> > There might be some additional information you are using to come up with
> > that conclusion, that is not obvious to me. Any thoughts there? These
> > calls use the same underlying page allocator (and I thought that both
> > were subject to the same constraints on defragmentation, as a result of
> > that). So I am not seeing any way that kmalloc could possibly be a
> > less-fragmenting call than vmalloc.
> 
> You guys are having quite a discussion over a very small point.
> 
> But, Ying is right.
> 
> Let's say we have a two-page data structure.  vmalloc() takes two
> effectively random order-0 pages, probably from two different 2M pages
> and pins them.  That "kills" two 2M pages.
> 
> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> a 2M boundary (it theoretically could).  That means it will only "kill"
> the possibility of a single 2M page.  More 2M pages == less fragmentation.

Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
to not try too hard on the kmalloc side so I really didn't get the
objection about direct compaction and reclaim which initially started
this discussion. Besides that the swapon path usually happens early
during the boot where we should have those larger blocks available.

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-30 16:31                   ` Michal Hocko
@ 2017-04-01  4:47                     ` Huang, Ying
  -1 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-04-01  4:47 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Dave Hansen, John Hubbard, Huang, Ying, David Rientjes,
	Andrew Morton, Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

Hi, Michal,

Michal Hocko <mhocko@kernel.org> writes:

> On Fri 24-03-17 06:56:10, Dave Hansen wrote:
>> On 03/24/2017 12:33 AM, John Hubbard wrote:
>> > There might be some additional information you are using to come up with
>> > that conclusion, that is not obvious to me. Any thoughts there? These
>> > calls use the same underlying page allocator (and I thought that both
>> > were subject to the same constraints on defragmentation, as a result of
>> > that). So I am not seeing any way that kmalloc could possibly be a
>> > less-fragmenting call than vmalloc.
>> 
>> You guys are having quite a discussion over a very small point.
>> 
>> But, Ying is right.
>> 
>> Let's say we have a two-page data structure.  vmalloc() takes two
>> effectively random order-0 pages, probably from two different 2M pages
>> and pins them.  That "kills" two 2M pages.
>> 
>> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
>> a 2M boundary (it theoretically could).  That means it will only "kill"
>> the possibility of a single 2M page.  More 2M pages == less fragmentation.
>
> Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
> to not try too hard on the kmalloc side so I really didn't get the
> objection about direct compaction and reclaim which initially started
> this discussion. Besides that the swapon path usually happens early
> during the boot where we should have those larger blocks available.

Could I add your Acked-by for this patch?

Best Regards,
Huang, Ying

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-04-01  4:47                     ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-04-01  4:47 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Dave Hansen, John Hubbard, Huang, Ying, David Rientjes,
	Andrew Morton, Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

Hi, Michal,

Michal Hocko <mhocko@kernel.org> writes:

> On Fri 24-03-17 06:56:10, Dave Hansen wrote:
>> On 03/24/2017 12:33 AM, John Hubbard wrote:
>> > There might be some additional information you are using to come up with
>> > that conclusion, that is not obvious to me. Any thoughts there? These
>> > calls use the same underlying page allocator (and I thought that both
>> > were subject to the same constraints on defragmentation, as a result of
>> > that). So I am not seeing any way that kmalloc could possibly be a
>> > less-fragmenting call than vmalloc.
>> 
>> You guys are having quite a discussion over a very small point.
>> 
>> But, Ying is right.
>> 
>> Let's say we have a two-page data structure.  vmalloc() takes two
>> effectively random order-0 pages, probably from two different 2M pages
>> and pins them.  That "kills" two 2M pages.
>> 
>> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
>> a 2M boundary (it theoretically could).  That means it will only "kill"
>> the possibility of a single 2M page.  More 2M pages == less fragmentation.
>
> Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
> to not try too hard on the kmalloc side so I really didn't get the
> objection about direct compaction and reclaim which initially started
> this discussion. Besides that the swapon path usually happens early
> during the boot where we should have those larger blocks available.

Could I add your Acked-by for this patch?

Best Regards,
Huang, Ying

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-04-01  4:47                     ` Huang, Ying
@ 2017-04-03  8:15                       ` Michal Hocko
  -1 siblings, 0 replies; 34+ messages in thread
From: Michal Hocko @ 2017-04-03  8:15 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Dave Hansen, John Hubbard, David Rientjes, Andrew Morton,
	Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

On Sat 01-04-17 12:47:56, Huang, Ying wrote:
> Hi, Michal,
> 
> Michal Hocko <mhocko@kernel.org> writes:
> 
> > On Fri 24-03-17 06:56:10, Dave Hansen wrote:
> >> On 03/24/2017 12:33 AM, John Hubbard wrote:
> >> > There might be some additional information you are using to come up with
> >> > that conclusion, that is not obvious to me. Any thoughts there? These
> >> > calls use the same underlying page allocator (and I thought that both
> >> > were subject to the same constraints on defragmentation, as a result of
> >> > that). So I am not seeing any way that kmalloc could possibly be a
> >> > less-fragmenting call than vmalloc.
> >> 
> >> You guys are having quite a discussion over a very small point.
> >> 
> >> But, Ying is right.
> >> 
> >> Let's say we have a two-page data structure.  vmalloc() takes two
> >> effectively random order-0 pages, probably from two different 2M pages
> >> and pins them.  That "kills" two 2M pages.
> >> 
> >> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> >> a 2M boundary (it theoretically could).  That means it will only "kill"
> >> the possibility of a single 2M page.  More 2M pages == less fragmentation.
> >
> > Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
> > to not try too hard on the kmalloc side so I really didn't get the
> > objection about direct compaction and reclaim which initially started
> > this discussion. Besides that the swapon path usually happens early
> > during the boot where we should have those larger blocks available.
> 
> Could I add your Acked-by for this patch?

Yes but please add the reasoning pointed out by Dave. As the patch
doesn't give any numbers and it would be fairly hard to add some without
artificial workloads we should at least document our current thinking
so that we can revisit it later.

Thanks!

Acked-by: Michal Hocko <mhocko@suse.com>

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-04-03  8:15                       ` Michal Hocko
  0 siblings, 0 replies; 34+ messages in thread
From: Michal Hocko @ 2017-04-03  8:15 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Dave Hansen, John Hubbard, David Rientjes, Andrew Morton,
	Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen, Mel Gorman,
	Aaron Lu, Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins,
	Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

On Sat 01-04-17 12:47:56, Huang, Ying wrote:
> Hi, Michal,
> 
> Michal Hocko <mhocko@kernel.org> writes:
> 
> > On Fri 24-03-17 06:56:10, Dave Hansen wrote:
> >> On 03/24/2017 12:33 AM, John Hubbard wrote:
> >> > There might be some additional information you are using to come up with
> >> > that conclusion, that is not obvious to me. Any thoughts there? These
> >> > calls use the same underlying page allocator (and I thought that both
> >> > were subject to the same constraints on defragmentation, as a result of
> >> > that). So I am not seeing any way that kmalloc could possibly be a
> >> > less-fragmenting call than vmalloc.
> >> 
> >> You guys are having quite a discussion over a very small point.
> >> 
> >> But, Ying is right.
> >> 
> >> Let's say we have a two-page data structure.  vmalloc() takes two
> >> effectively random order-0 pages, probably from two different 2M pages
> >> and pins them.  That "kills" two 2M pages.
> >> 
> >> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> >> a 2M boundary (it theoretically could).  That means it will only "kill"
> >> the possibility of a single 2M page.  More 2M pages == less fragmentation.
> >
> > Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
> > to not try too hard on the kmalloc side so I really didn't get the
> > objection about direct compaction and reclaim which initially started
> > this discussion. Besides that the swapon path usually happens early
> > during the boot where we should have those larger blocks available.
> 
> Could I add your Acked-by for this patch?

Yes but please add the reasoning pointed out by Dave. As the patch
doesn't give any numbers and it would be fairly hard to add some without
artificial workloads we should at least document our current thinking
so that we can revisit it later.

Thanks!

Acked-by: Michal Hocko <mhocko@suse.com>

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-04-03  8:15                       ` Michal Hocko
@ 2017-04-05  0:49                         ` Huang, Ying
  -1 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-04-05  0:49 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Huang, Ying, Dave Hansen, John Hubbard, David Rientjes,
	Andrew Morton, Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

Michal Hocko <mhocko@kernel.org> writes:

> On Sat 01-04-17 12:47:56, Huang, Ying wrote:
>> Hi, Michal,
>> 
>> Michal Hocko <mhocko@kernel.org> writes:
>> 
>> > On Fri 24-03-17 06:56:10, Dave Hansen wrote:
>> >> On 03/24/2017 12:33 AM, John Hubbard wrote:
>> >> > There might be some additional information you are using to come up with
>> >> > that conclusion, that is not obvious to me. Any thoughts there? These
>> >> > calls use the same underlying page allocator (and I thought that both
>> >> > were subject to the same constraints on defragmentation, as a result of
>> >> > that). So I am not seeing any way that kmalloc could possibly be a
>> >> > less-fragmenting call than vmalloc.
>> >> 
>> >> You guys are having quite a discussion over a very small point.
>> >> 
>> >> But, Ying is right.
>> >> 
>> >> Let's say we have a two-page data structure.  vmalloc() takes two
>> >> effectively random order-0 pages, probably from two different 2M pages
>> >> and pins them.  That "kills" two 2M pages.
>> >> 
>> >> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
>> >> a 2M boundary (it theoretically could).  That means it will only "kill"
>> >> the possibility of a single 2M page.  More 2M pages == less fragmentation.
>> >
>> > Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
>> > to not try too hard on the kmalloc side so I really didn't get the
>> > objection about direct compaction and reclaim which initially started
>> > this discussion. Besides that the swapon path usually happens early
>> > during the boot where we should have those larger blocks available.
>> 
>> Could I add your Acked-by for this patch?
>
> Yes but please add the reasoning pointed out by Dave. As the patch
> doesn't give any numbers and it would be fairly hard to add some without
> artificial workloads we should at least document our current thinking
> so that we can revisit it later.
>
> Thanks!
>
> Acked-by: Michal Hocko <mhocko@suse.com>

Thanks, will add the reasoning.

Best Regards,
Huang, Ying

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-04-05  0:49                         ` Huang, Ying
  0 siblings, 0 replies; 34+ messages in thread
From: Huang, Ying @ 2017-04-05  0:49 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Huang, Ying, Dave Hansen, John Hubbard, David Rientjes,
	Andrew Morton, Andi Kleen, Shaohua Li, Rik van Riel, Tim Chen,
	Mel Gorman, Aaron Lu, Gerald Schaefer, Kirill A. Shutemov,
	Hugh Dickins, Ingo Molnar, Vegard Nossum, linux-mm, linux-kernel

Michal Hocko <mhocko@kernel.org> writes:

> On Sat 01-04-17 12:47:56, Huang, Ying wrote:
>> Hi, Michal,
>> 
>> Michal Hocko <mhocko@kernel.org> writes:
>> 
>> > On Fri 24-03-17 06:56:10, Dave Hansen wrote:
>> >> On 03/24/2017 12:33 AM, John Hubbard wrote:
>> >> > There might be some additional information you are using to come up with
>> >> > that conclusion, that is not obvious to me. Any thoughts there? These
>> >> > calls use the same underlying page allocator (and I thought that both
>> >> > were subject to the same constraints on defragmentation, as a result of
>> >> > that). So I am not seeing any way that kmalloc could possibly be a
>> >> > less-fragmenting call than vmalloc.
>> >> 
>> >> You guys are having quite a discussion over a very small point.
>> >> 
>> >> But, Ying is right.
>> >> 
>> >> Let's say we have a two-page data structure.  vmalloc() takes two
>> >> effectively random order-0 pages, probably from two different 2M pages
>> >> and pins them.  That "kills" two 2M pages.
>> >> 
>> >> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
>> >> a 2M boundary (it theoretically could).  That means it will only "kill"
>> >> the possibility of a single 2M page.  More 2M pages == less fragmentation.
>> >
>> > Yes I agree with this. And the patch is no brainer. kvmalloc makes sure
>> > to not try too hard on the kmalloc side so I really didn't get the
>> > objection about direct compaction and reclaim which initially started
>> > this discussion. Besides that the swapon path usually happens early
>> > during the boot where we should have those larger blocks available.
>> 
>> Could I add your Acked-by for this patch?
>
> Yes but please add the reasoning pointed out by Dave. As the patch
> doesn't give any numbers and it would be fairly hard to add some without
> artificial workloads we should at least document our current thinking
> so that we can revisit it later.
>
> Thanks!
>
> Acked-by: Michal Hocko <mhocko@suse.com>

Thanks, will add the reasoning.

Best Regards,
Huang, Ying

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
  2017-03-24 13:56                 ` Dave Hansen
@ 2017-04-05 13:43                   ` Vlastimil Babka
  -1 siblings, 0 replies; 34+ messages in thread
From: Vlastimil Babka @ 2017-04-05 13:43 UTC (permalink / raw)
  To: Dave Hansen, John Hubbard, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On 03/24/2017 02:56 PM, Dave Hansen wrote:
> On 03/24/2017 12:33 AM, John Hubbard wrote:
>> There might be some additional information you are using to come up with
>> that conclusion, that is not obvious to me. Any thoughts there? These
>> calls use the same underlying page allocator (and I thought that both
>> were subject to the same constraints on defragmentation, as a result of
>> that). So I am not seeing any way that kmalloc could possibly be a
>> less-fragmenting call than vmalloc.
> 
> You guys are having quite a discussion over a very small point.

Sorry, I know I'm too late for this discussion, just wanted to clarify a
bit.

> But, Ying is right.
> 
> Let's say we have a two-page data structure.  vmalloc() takes two
> effectively random order-0 pages, probably from two different 2M pages
> and pins them.  That "kills" two 2M pages.
> 
> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> a 2M boundary (it theoretically could).

If by "theoretically" you mean we switch kmalloc() from a buddy
allocator to something else, then yes. Otherwise, in the buddy
allocator, it cannot cross the 2M boundary by design.

> That means it will only "kill"
> the possibility of a single 2M page.  More 2M pages == less fragmentation.

IMHO John is right that kmalloc() will reduce the number of high-order
pages *in the short term*. But in the long term, vmalloc() will hurt us
more due to the scattering of unmovable pages as you describe. As this
is AFAIU a long-term allocation, kmalloc() should be preferred.

Vlastimil

> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure
@ 2017-04-05 13:43                   ` Vlastimil Babka
  0 siblings, 0 replies; 34+ messages in thread
From: Vlastimil Babka @ 2017-04-05 13:43 UTC (permalink / raw)
  To: Dave Hansen, John Hubbard, Huang, Ying
  Cc: David Rientjes, Andrew Morton, Andi Kleen, Shaohua Li,
	Rik van Riel, Tim Chen, Michal Hocko, Mel Gorman, Aaron Lu,
	Gerald Schaefer, Kirill A. Shutemov, Hugh Dickins, Ingo Molnar,
	Vegard Nossum, linux-mm, linux-kernel

On 03/24/2017 02:56 PM, Dave Hansen wrote:
> On 03/24/2017 12:33 AM, John Hubbard wrote:
>> There might be some additional information you are using to come up with
>> that conclusion, that is not obvious to me. Any thoughts there? These
>> calls use the same underlying page allocator (and I thought that both
>> were subject to the same constraints on defragmentation, as a result of
>> that). So I am not seeing any way that kmalloc could possibly be a
>> less-fragmenting call than vmalloc.
> 
> You guys are having quite a discussion over a very small point.

Sorry, I know I'm too late for this discussion, just wanted to clarify a
bit.

> But, Ying is right.
> 
> Let's say we have a two-page data structure.  vmalloc() takes two
> effectively random order-0 pages, probably from two different 2M pages
> and pins them.  That "kills" two 2M pages.
> 
> kmalloc(), allocating two *contiguous* pages, is very unlikely to cross
> a 2M boundary (it theoretically could).

If by "theoretically" you mean we switch kmalloc() from a buddy
allocator to something else, then yes. Otherwise, in the buddy
allocator, it cannot cross the 2M boundary by design.

> That means it will only "kill"
> the possibility of a single 2M page.  More 2M pages == less fragmentation.

IMHO John is right that kmalloc() will reduce the number of high-order
pages *in the short term*. But in the long term, vmalloc() will hurt us
more due to the scattering of unmovable pages as you describe. As this
is AFAIU a long-term allocation, kmalloc() should be preferred.

Vlastimil

> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2017-04-05 13:44 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-20  8:47 [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure Huang, Ying
2017-03-20  8:47 ` Huang, Ying
2017-03-20  8:47 ` [PATCH -v2 2/2] mm, swap: Sort swap entries before free Huang, Ying
2017-03-20  8:47   ` Huang, Ying
2017-03-20 21:32 ` [PATCH -v2 1/2] mm, swap: Use kvzalloc to allocate some swap data structure David Rientjes
2017-03-20 21:32   ` David Rientjes
2017-03-24  2:41   ` Huang, Ying
2017-03-24  2:41     ` Huang, Ying
2017-03-24  4:27     ` John Hubbard
2017-03-24  4:27       ` John Hubbard
2017-03-24  4:52       ` Huang, Ying
2017-03-24  4:52         ` Huang, Ying
2017-03-24  6:48         ` John Hubbard
2017-03-24  6:48           ` John Hubbard
2017-03-24  7:16           ` Huang, Ying
2017-03-24  7:16             ` Huang, Ying
2017-03-24  7:33             ` John Hubbard
2017-03-24  7:33               ` John Hubbard
2017-03-24 13:56               ` Dave Hansen
2017-03-24 13:56                 ` Dave Hansen
2017-03-24 16:52                 ` Tim Chen
2017-03-24 16:52                   ` Tim Chen
2017-03-24 18:15                   ` John Hubbard
2017-03-24 18:15                     ` John Hubbard
2017-03-30 16:31                 ` Michal Hocko
2017-03-30 16:31                   ` Michal Hocko
2017-04-01  4:47                   ` Huang, Ying
2017-04-01  4:47                     ` Huang, Ying
2017-04-03  8:15                     ` Michal Hocko
2017-04-03  8:15                       ` Michal Hocko
2017-04-05  0:49                       ` Huang, Ying
2017-04-05  0:49                         ` Huang, Ying
2017-04-05 13:43                 ` Vlastimil Babka
2017-04-05 13:43                   ` Vlastimil Babka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.