All of lore.kernel.org
 help / color / mirror / Atom feed
* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
@ 2017-05-16 10:07 Cyril Hrubis
  2017-05-16 12:28 ` Jan Stancek
  0 siblings, 1 reply; 14+ messages in thread
From: Cyril Hrubis @ 2017-05-16 10:07 UTC (permalink / raw)
  To: ltp

[THIS PATCH ACTUALLY APPLIES ON THE TOP OF THE LATEST GIT, NO FUNCTIONAL
CHANGES WERE DONE]

This commit makes sure that enough huge pages are available on each node prior
to the test.

One problem we had is that there has to be at least four huge pages available
in the per-node pools even though we only allocate two. One of the
possibilities is that when we are moving pages back and forth between the nodes
there may be some overlap when huge page is allocated on a node but the two
huge pages there, that are about to be moved, are still there or at least
accounted for. Hence we have to make sure that at least four huge pages are
available prior to the test.

The second problem is that huge page pools are limited by several files in the
virtual filesystem. There is global knob for controlling the huge page pool
size in /proc, then there are per-node knobs in /sys. The value written to the
global knob is distributed evenly between the per-node knobs, hence on two node
machine writing 8 to the global knob is sufficient to make sure there is enough
huge pages for the test. But that does not work if the machine has three or
more nodes. Hence this patch tries to adjust per-node pools on the nodes
selected for the test and only if that is not possible we adjust the global
knob and then make sure that expected number of huge pages could be allocated
on each node.

Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
---
 .../kernel/syscalls/move_pages/move_pages12.c      | 100 +++++++++++++++++++--
 1 file changed, 93 insertions(+), 7 deletions(-)

diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index f93311f..6a1a186 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -35,6 +35,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <string.h>
+#include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
@@ -52,7 +53,11 @@
 #define TEST_NODES	2
 
 static int pgsz, hpsz;
-static long orig_hugepages;
+static long orig_hugepages = -1;
+static char path_hugepages_node1[PATH_MAX];
+static char path_hugepages_node2[PATH_MAX];
+static long orig_hugepages_node1 = -1;
+static long orig_hugepages_node2 = -1;
 static unsigned int node1, node2;
 static void *addr;
 
@@ -128,6 +133,45 @@ static void do_test(void)
 	}
 }
 
+static void alloc_free_huge_on_node(unsigned int node, size_t size)
+{
+	char *mem;
+	long ret;
+	struct bitmask *bm;
+
+	tst_res(TINFO, "Allocating and freeing %zu hugepages on node %u",
+		size / hpsz, node);
+
+	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+	if (mem == MAP_FAILED) {
+		if (errno == ENOMEM)
+			tst_brk(TCONF, "Cannot allocate huge pages");
+
+		tst_brk(TBROK | TERRNO, "mmap(..., MAP_HUGETLB, ...) failed");
+	}
+
+	bm = numa_bitmask_alloc(numa_max_possible_node() + 1);
+	if (!bm)
+		tst_brk(TBROK | TERRNO, "numa_bitmask_alloc() failed");
+
+	numa_bitmask_setbit(bm, node);
+
+	ret = mbind(mem, size, MPOL_BIND, bm->maskp, bm->size + 1, 0);
+	if (ret) {
+		if (errno == ENOMEM)
+			tst_brk(TCONF, "Cannot mbind huge pages");
+
+		tst_brk(TBROK | TERRNO, "mbind() failed");
+	}
+
+	numa_bitmask_free(bm);
+
+	memset(mem, 0, size);
+
+	SAFE_MUNMAP(mem, size);
+}
+
 static void setup(void)
 {
 	int memfree, ret;
@@ -137,6 +181,10 @@ static void setup(void)
 	if (access(PATH_HUGEPAGES, F_OK))
 		tst_brk(TCONF, "Huge page not supported");
 
+	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
+	if (ret < 0)
+		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
+
 	pgsz = (int)get_page_size();
 	SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "Hugepagesize: %d", &hpsz);
 
@@ -148,18 +196,56 @@ static void setup(void)
 
 	hpsz *= 1024;
 
-	SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
-	SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 4);
+	snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
+		 "/sys/devices/system/node/node%u/hugepages/hugepages-2048kB/nr_hugepages",
+		 node1);
+
+	snprintf(path_hugepages_node2, sizeof(path_hugepages_node2),
+		 "/sys/devices/system/node/node%u/hugepages/hugepages-2048kB/nr_hugepages",
+		 node2);
+
+	if (!access(path_hugepages_node1, F_OK)) {
+		SAFE_FILE_SCANF(path_hugepages_node1,
+				"%ld", &orig_hugepages_node1);
+		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
+			node1, orig_hugepages_node1 + 4);
+		SAFE_FILE_PRINTF(path_hugepages_node1,
+				 "%ld", orig_hugepages_node1 + 4);
+	}
 
-	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
-	if (ret < 0)
-		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
+	if (!access(path_hugepages_node2, F_OK)) {
+		SAFE_FILE_SCANF(path_hugepages_node2,
+				"%ld", &orig_hugepages_node2);
+		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
+			node2, orig_hugepages_node2 + 4);
+		SAFE_FILE_PRINTF(path_hugepages_node2,
+				 "%ld", orig_hugepages_node2 + 4);
+	}
+
+	if (orig_hugepages_node1 == -1 || orig_hugepages_node2 == -1) {
+		SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
+		tst_res(TINFO, "Increasing global hugepages pool to %ld",
+			orig_hugepages + 8);
+		SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 8);
+		alloc_free_huge_on_node(node1, 4 * hpsz);
+		alloc_free_huge_on_node(node2, 4 * hpsz);
+	}
 }
 
 static void cleanup(void)
 {
-	if (!access(PATH_HUGEPAGES, F_OK))
+	if (orig_hugepages != -1)
 		SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages);
+
+	if (orig_hugepages_node1 != -1) {
+		SAFE_FILE_PRINTF(path_hugepages_node1,
+				 "%ld", orig_hugepages_node1);
+	}
+
+	if (orig_hugepages_node2 != -1) {
+		SAFE_FILE_PRINTF(path_hugepages_node2,
+				 "%ld", orig_hugepages_node2);
+	}
 }
 
 static struct tst_test test = {
-- 
2.7.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 10:07 [LTP] [PATCH v2] move_pages12: Make sure hugepages are available Cyril Hrubis
@ 2017-05-16 12:28 ` Jan Stancek
  2017-05-16 13:32   ` Cyril Hrubis
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Stancek @ 2017-05-16 12:28 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> +
>  static void setup(void)
>  {
>  	int memfree, ret;
> @@ -137,6 +181,10 @@ static void setup(void)
>  	if (access(PATH_HUGEPAGES, F_OK))
>  		tst_brk(TCONF, "Huge page not supported");
>  
> +	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
> +	if (ret < 0)
> +		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
> +
>  	pgsz = (int)get_page_size();
>  	SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "Hugepagesize: %d", &hpsz);
>  
> @@ -148,18 +196,56 @@ static void setup(void)
>  
>  	hpsz *= 1024;
>  
> -	SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
> -	SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 4);
> +	snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
> +
> 		 "/sys/devices/system/node/node%u/hugepages/hugepages-2048kB/nr_hugepages",
> +		 node1);
> +
> +	snprintf(path_hugepages_node2, sizeof(path_hugepages_node2),
> +
> 		 "/sys/devices/system/node/node%u/hugepages/hugepages-2048kB/nr_hugepages",
> +		 node2);

Hi,

"hugepages-2048kB" in path above will work only on systems with 2M huge pages.

> +
> +	if (!access(path_hugepages_node1, F_OK)) {
> +		SAFE_FILE_SCANF(path_hugepages_node1,
> +				"%ld", &orig_hugepages_node1);
> +		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
> +			node1, orig_hugepages_node1 + 4);
> +		SAFE_FILE_PRINTF(path_hugepages_node1,
> +				 "%ld", orig_hugepages_node1 + 4);

There doesn't seem to be any error if you ask for more:

# echo 20000 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
# cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
11650

So, maybe we can just read it back and if it doesn't match what we requested,
we can TCONF.

Rest looks good to me.
I've tested so far only on x86, no issues there.

Regards,
Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 12:28 ` Jan Stancek
@ 2017-05-16 13:32   ` Cyril Hrubis
  2017-05-16 14:05     ` Jan Stancek
  0 siblings, 1 reply; 14+ messages in thread
From: Cyril Hrubis @ 2017-05-16 13:32 UTC (permalink / raw)
  To: ltp

Hi!
> "hugepages-2048kB" in path above will work only on systems with 2M huge pages.

Do you have a ppc64 numa machine with more than two nodes at hand? Since
that is the only one where the current code may fail. Both x86_64 and
aarch64 seems to have 2MB huge pages.

I would just go with this patch now, and possibly fix more complicated
corner cases after the release, since this patch is the last problem
that holds the release from my side.

Anything else that should be taken care of before the release?

> > +
> > +	if (!access(path_hugepages_node1, F_OK)) {
> > +		SAFE_FILE_SCANF(path_hugepages_node1,
> > +				"%ld", &orig_hugepages_node1);
> > +		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
> > +			node1, orig_hugepages_node1 + 4);
> > +		SAFE_FILE_PRINTF(path_hugepages_node1,
> > +				 "%ld", orig_hugepages_node1 + 4);
> 
> There doesn't seem to be any error if you ask for more:
> 
> # echo 20000 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
> # cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
> 11650
> 
> So, maybe we can just read it back and if it doesn't match what we requested,
> we can TCONF.

Or we may try to allocate 4 huge pages on both nodes even in a case that
we set the per-node limits that should catch the problem as well. Is
that OK with you?

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 13:32   ` Cyril Hrubis
@ 2017-05-16 14:05     ` Jan Stancek
  2017-05-16 14:15       ` Cyril Hrubis
  2017-05-16 14:15       ` Jan Stancek
  0 siblings, 2 replies; 14+ messages in thread
From: Jan Stancek @ 2017-05-16 14:05 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> Hi!
> > "hugepages-2048kB" in path above will work only on systems with 2M huge
> > pages.
> 
> Do you have a ppc64 numa machine with more than two nodes at hand? Since

Yes, I have access to couple with 4 numa nodes.

> that is the only one where the current code may fail. Both x86_64 and
> aarch64 seems to have 2MB huge pages.

Default huge page for aarch64 is 512M.

# cat /proc/meminfo | grep Hugepagesize
Hugepagesize:     524288 kB

# uname -r
4.11.0-2.el7.aarch64

I think in 4.11 you can't even switch with default_hugepagesz=2M at the moment:
 6ae979ab39a3 "Revert "Revert "arm64: hugetlb: partial revert of 66b3923a1a0f"""

> 
> I would just go with this patch now, and possibly fix more complicated
> corner cases after the release, since this patch is the last problem
> that holds the release from my side.

Can't we squeeze it in? All we need is to use "hpsz" we already have:

  snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
          "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
          node1, hpsz);

> 
> Anything else that should be taken care of before the release?

No, this should be last pending patch.

> 
> > > +
> > > +	if (!access(path_hugepages_node1, F_OK)) {
> > > +		SAFE_FILE_SCANF(path_hugepages_node1,
> > > +				"%ld", &orig_hugepages_node1);
> > > +		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
> > > +			node1, orig_hugepages_node1 + 4);
> > > +		SAFE_FILE_PRINTF(path_hugepages_node1,
> > > +				 "%ld", orig_hugepages_node1 + 4);
> > 
> > There doesn't seem to be any error if you ask for more:
> > 
> > # echo 20000 >
> > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
> > # cat
> > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
> > 11650
> > 
> > So, maybe we can just read it back and if it doesn't match what we
> > requested,
> > we can TCONF.
> 
> Or we may try to allocate 4 huge pages on both nodes even in a case that
> we set the per-node limits that should catch the problem as well. Is
> that OK with you?

Yes, that should work too.

Regards,
Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 14:05     ` Jan Stancek
@ 2017-05-16 14:15       ` Cyril Hrubis
  2017-05-16 14:15       ` Jan Stancek
  1 sibling, 0 replies; 14+ messages in thread
From: Cyril Hrubis @ 2017-05-16 14:15 UTC (permalink / raw)
  To: ltp

Hi!
> > Do you have a ppc64 numa machine with more than two nodes at hand? Since
> 
> Yes, I have access to couple with 4 numa nodes.
> 
> > that is the only one where the current code may fail. Both x86_64 and
> > aarch64 seems to have 2MB huge pages.
> 
> Default huge page for aarch64 is 512M.
> 
> # cat /proc/meminfo | grep Hugepagesize
> Hugepagesize:     524288 kB
> 
> # uname -r
> 4.11.0-2.el7.aarch64
> 
> I think in 4.11 you can't even switch with default_hugepagesz=2M at the moment:
>  6ae979ab39a3 "Revert "Revert "arm64: hugetlb: partial revert of 66b3923a1a0f"""

Hmm, my SLES12 SP2 aarch64 with kernel 4.4 has 2MB so it's not even
consistent among architectures.

> > 
> > I would just go with this patch now, and possibly fix more complicated
> > corner cases after the release, since this patch is the last problem
> > that holds the release from my side.
> 
> Can't we squeeze it in? All we need is to use "hpsz" we already have:
> 
>   snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
>           "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
>           node1, hpsz);

Okay, let's go with that one. Presumbly if there is not enough RAM the
size of the poll will be truncated silently here and we will produce
TCONF once we try to allocate these in case that the default hugepage
size is too big.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 14:05     ` Jan Stancek
  2017-05-16 14:15       ` Cyril Hrubis
@ 2017-05-16 14:15       ` Jan Stancek
  2017-05-16 14:29         ` Cyril Hrubis
  1 sibling, 1 reply; 14+ messages in thread
From: Jan Stancek @ 2017-05-16 14:15 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> 
> 
> ----- Original Message -----
> > Hi!
> > > "hugepages-2048kB" in path above will work only on systems with 2M huge
> > > pages.
> > 
> > Do you have a ppc64 numa machine with more than two nodes at hand? Since
> 
> Yes, I have access to couple with 4 numa nodes.
> 
> > that is the only one where the current code may fail. Both x86_64 and
> > aarch64 seems to have 2MB huge pages.
> 
> Default huge page for aarch64 is 512M.
> 
> # cat /proc/meminfo | grep Hugepagesize
> Hugepagesize:     524288 kB
> 
> # uname -r
> 4.11.0-2.el7.aarch64
> 
> I think in 4.11 you can't even switch with default_hugepagesz=2M at the
> moment:
>  6ae979ab39a3 "Revert "Revert "arm64: hugetlb: partial revert of
>  66b3923a1a0f"""
> 
> > 
> > I would just go with this patch now, and possibly fix more complicated
> > corner cases after the release, since this patch is the last problem
> > that holds the release from my side.
> 
> Can't we squeeze it in? All we need is to use "hpsz" we already have:
> 
>   snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
>           "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
>           node1, hpsz);

Correction: "hpsz / 1024", we multiply it in setup() by 1024.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 14:15       ` Jan Stancek
@ 2017-05-16 14:29         ` Cyril Hrubis
  2017-05-17  8:21           ` Jan Stancek
  2017-05-29 13:12           ` Jan Stancek
  0 siblings, 2 replies; 14+ messages in thread
From: Cyril Hrubis @ 2017-05-16 14:29 UTC (permalink / raw)
  To: ltp

Hi!
> Correction: "hpsz / 1024", we multiply it in setup() by 1024.

I've moved the multiplication down in the setup so that we end up with
correct value without the divison. And I've also added you Signed-off-by
to the patch, since you did non-trivial amount of work on it and finally
pushed it. Thanks for you help.

Now I will tag the git, upload tarballs, etc...

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 14:29         ` Cyril Hrubis
@ 2017-05-17  8:21           ` Jan Stancek
  2017-05-29 13:12           ` Jan Stancek
  1 sibling, 0 replies; 14+ messages in thread
From: Jan Stancek @ 2017-05-17  8:21 UTC (permalink / raw)
  To: ltp


----- Original Message -----
> Hi!
> > Correction: "hpsz / 1024", we multiply it in setup() by 1024.
> 
> I've moved the multiplication down in the setup so that we end up with
> correct value without the divison. And I've also added you Signed-off-by
> to the patch, since you did non-trivial amount of work on it and finally
> pushed it. Thanks for you help.

I let the last version running over night. No issues on x86 and ppc64le
with 4 nodes. arm with 512M hugepages had small issue with int overflow,
I pushed a patch.

Regards,
Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-16 14:29         ` Cyril Hrubis
  2017-05-17  8:21           ` Jan Stancek
@ 2017-05-29 13:12           ` Jan Stancek
  2017-05-29 13:45             ` Jan Stancek
  2017-05-30 11:50             ` Cyril Hrubis
  1 sibling, 2 replies; 14+ messages in thread
From: Jan Stancek @ 2017-05-29 13:12 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> Hi!
> > Correction: "hpsz / 1024", we multiply it in setup() by 1024.
> 
> I've moved the multiplication down in the setup so that we end up with
> correct value without the divison. And I've also added you Signed-off-by
> to the patch, since you did non-trivial amount of work on it and finally
> pushed it. Thanks for you help.

Hi,

I'm sporadically running into SIGBUS in this testcase, not sure if it's 
because of low memory or something else. Do you see it too?

I wonder if we should replace memset with MAP_POPULATE.

(gdb) bt
#0  0x00003fffb16ac620 in .__memset_power8 () from /lib64/libc.so.6
#1  0x0000000010003344 in memset (__len=67108864, __ch=0, __dest=0x3efffc000000) at /usr/include/bits/string3.h:84
#2  alloc_free_huge_on_node (node=<optimized out>, size=67108864) at move_pages12.c:170
#3  0x0000000010003648 in setup () at move_pages12.c:235
#4  0x0000000010006ad4 in do_test_setup () at tst_test.c:705
#5  testrun () at tst_test.c:778
#6  tst_run_tcases (argc=<optimized out>, argv=0x3fffd1c7e488, self=<optimized out>) at tst_test.c:884
#7  0x0000000010002f58 in main (argc=<optimized out>, argv=<optimized out>) at ../../../../include/tst_test.h:189

[pid 48425] 08:45:57.151242 write(2, "move_pages12.c:143: \33[1;34mINFO:"..., 82move_pages12.c:143: INFO: Allocating and freeing 4 hug
epages on node 2
) = 82
[pid 48425] 08:45:57.151287 mmap(NULL, 67108864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x3efffc000000
[pid 48425] 08:45:57.151442 mbind(0x3efffc000000, 67108864, MPOL_BIND, [0x0000000000000004, 000000000000000000, 000000000000000000, 00
0000000000000000], 257, 0) = 0
[pid 48425] 08:45:57.167377 munmap(0x3efffc000000, 67108864) = 0
[pid 48425] 08:45:57.167486 write(2, "move_pages12.c:143: \33[1;34mINFO:"..., 82move_pages12.c:143: INFO: Allocating and freeing 4 hug
epages on node 3
) = 82
[pid 48425] 08:45:57.167554 mmap(NULL, 67108864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x3efffc000000
[pid 48425] 08:45:57.167648 mbind(0x3efffc000000, 67108864, MPOL_BIND, [0x0000000000000008, 000000000000000000, 000000000000000000, 00
0000000000000000], 257, 0) = 0
[pid 48425] 08:45:57.172293 --- SIGBUS {si_signo=SIGBUS, si_code=BUS_ADRERR, si_addr=0x3efffe000000} ---

available: 3 nodes (0,2-3)
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
node 0 size: 0 MB
node 0 free: 0 MB
node 2 cpus:
node 2 size: 16384 MB
node 2 free: 12969 MB
node 3 cpus:
node 3 size: 2048 MB
node 3 free: 58 MB
node distances:
node   0   2   3
  0:  10  40  10
  2:  40  10  40
  3:  10  40  10

Regards,
Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-29 13:12           ` Jan Stancek
@ 2017-05-29 13:45             ` Jan Stancek
  2017-05-30 11:50             ` Cyril Hrubis
  1 sibling, 0 replies; 14+ messages in thread
From: Jan Stancek @ 2017-05-29 13:45 UTC (permalink / raw)
  To: ltp

On 05/29/2017 03:12 PM, Jan Stancek wrote:
> 
> 
> ----- Original Message -----
>> Hi!
>>> Correction: "hpsz / 1024", we multiply it in setup() by 1024.
>>
>> I've moved the multiplication down in the setup so that we end up with
>> correct value without the divison. And I've also added you Signed-off-by
>> to the patch, since you did non-trivial amount of work on it and finally
>> pushed it. Thanks for you help.
> 
> Hi,
> 
> I'm sporadically running into SIGBUS in this testcase, not sure if it's 
> because of low memory or something else. Do you see it too?
> 
> I wonder if we should replace memset with MAP_POPULATE.

What would you think about something like this?

diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index e1d956dba67e..d22b3c917370 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -143,7 +143,8 @@ static void alloc_free_huge_on_node(unsigned int node, size_t size)
 		size / hpsz, node);

 	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
-		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE,
+		   -1, 0);
 	if (mem == MAP_FAILED) {
 		if (errno == ENOMEM)
 			tst_brk(TCONF, "Cannot allocate huge pages");
@@ -157,7 +158,7 @@ static void alloc_free_huge_on_node(unsigned int node, size_t size)

 	numa_bitmask_setbit(bm, node);

-	ret = mbind(mem, size, MPOL_BIND, bm->maskp, bm->size + 1, 0);
+	ret = mbind(mem, size, MPOL_MF_MOVE, bm->maskp, bm->size + 1, 0);
 	if (ret) {
 		if (errno == ENOMEM)
 			tst_brk(TCONF, "Cannot mbind huge pages");
@@ -167,8 +168,6 @@ static void alloc_free_huge_on_node(unsigned int node, size_t size)

 	numa_bitmask_free(bm);

-	memset(mem, 0, size);
-
 	SAFE_MUNMAP(mem, size);
 }

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-29 13:12           ` Jan Stancek
  2017-05-29 13:45             ` Jan Stancek
@ 2017-05-30 11:50             ` Cyril Hrubis
  2017-05-30 13:11               ` Jan Stancek
  1 sibling, 1 reply; 14+ messages in thread
From: Cyril Hrubis @ 2017-05-30 11:50 UTC (permalink / raw)
  To: ltp

Hi!
> I'm sporadically running into SIGBUS in this testcase, not sure if it's 
> because of low memory or something else. Do you see it too?

None so far, but I haven't been running the test on anything else than
machines with just two numa nodes so far.

> I wonder if we should replace memset with MAP_POPULATE.

Isn't MAP_POPULATE best effort only?

I guess that we can then call mincore() to check if MAP_POPULATE really
populated the pages and possibly try dropping system caches and retry
again then produce TCONF if we happen to fail again.

> (gdb) bt
> #0  0x00003fffb16ac620 in .__memset_power8 () from /lib64/libc.so.6
> #1  0x0000000010003344 in memset (__len=67108864, __ch=0, __dest=0x3efffc000000) at /usr/include/bits/string3.h:84
> #2  alloc_free_huge_on_node (node=<optimized out>, size=67108864) at move_pages12.c:170
> #3  0x0000000010003648 in setup () at move_pages12.c:235
> #4  0x0000000010006ad4 in do_test_setup () at tst_test.c:705
> #5  testrun () at tst_test.c:778
> #6  tst_run_tcases (argc=<optimized out>, argv=0x3fffd1c7e488, self=<optimized out>) at tst_test.c:884
> #7  0x0000000010002f58 in main (argc=<optimized out>, argv=<optimized out>) at ../../../../include/tst_test.h:189
> 
> [pid 48425] 08:45:57.151242 write(2, "move_pages12.c:143: \33[1;34mINFO:"..., 82move_pages12.c:143: INFO: Allocating and freeing 4 hug
> epages on node 2
> ) = 82
> [pid 48425] 08:45:57.151287 mmap(NULL, 67108864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x3efffc000000
> [pid 48425] 08:45:57.151442 mbind(0x3efffc000000, 67108864, MPOL_BIND, [0x0000000000000004, 000000000000000000, 000000000000000000, 00
> 0000000000000000], 257, 0) = 0
> [pid 48425] 08:45:57.167377 munmap(0x3efffc000000, 67108864) = 0
> [pid 48425] 08:45:57.167486 write(2, "move_pages12.c:143: \33[1;34mINFO:"..., 82move_pages12.c:143: INFO: Allocating and freeing 4 hug
> epages on node 3
> ) = 82
> [pid 48425] 08:45:57.167554 mmap(NULL, 67108864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x3efffc000000
> [pid 48425] 08:45:57.167648 mbind(0x3efffc000000, 67108864, MPOL_BIND, [0x0000000000000008, 000000000000000000, 000000000000000000, 00
> 0000000000000000], 257, 0) = 0
> [pid 48425] 08:45:57.172293 --- SIGBUS {si_signo=SIGBUS, si_code=BUS_ADRERR, si_addr=0x3efffe000000} ---

Looks like we happen to got the signal when we try to fault third page,
at least if the si_addr is correct it points in the middle of the
mapping. So I guess that there is not enough continuous blocks to back
the mapping.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-30 11:50             ` Cyril Hrubis
@ 2017-05-30 13:11               ` Jan Stancek
  2017-05-30 13:48                 ` Cyril Hrubis
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Stancek @ 2017-05-30 13:11 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> Hi!
> > I'm sporadically running into SIGBUS in this testcase, not sure if it's
> > because of low memory or something else. Do you see it too?
> 
> None so far, but I haven't been running the test on anything else than
> machines with just two numa nodes so far.
> 
> > I wonder if we should replace memset with MAP_POPULATE.
> 
> Isn't MAP_POPULATE best effort only?

It's a readahead for file mappings, not sure about anonymous.

As alternative commit [1] gave me idea to try mlock, and that seems
to work too. If a node doesn't have enough memory I get ENOMEM.

diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index e1d956dba67e..4c7d5c2c01b0 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -165,9 +165,15 @@ static void alloc_free_huge_on_node(unsigned int node, size_t size)
                tst_brk(TBROK | TERRNO, "mbind() failed");
        }
 
-       numa_bitmask_free(bm);
+       TEST(mlock(mem, size));
+       if (TEST_RETURN) {
+               SAFE_MUNMAP(mem, size);
+               if (TEST_ERRNO == ENOMEM || TEST_ERRNO == EAGAIN)
+                       tst_brk(TCONF, "Cannot lock huge pages");
+               tst_brk(TBROK | TTERRNO, "mlock failed");
+       }
 
-       memset(mem, 0, size);
+       numa_bitmask_free(bm);
 
        SAFE_MUNMAP(mem, size);
 }


[1] 04f2cbe35699 "hugetlb: guarantee that COW faults for a process that called mmap(MAP_PRIVATE) on hugetlbfs will succeed"

> 
> I guess that we can then call mincore() to check if MAP_POPULATE really
> populated the pages and possibly try dropping system caches and retry
> again then produce TCONF if we happen to fail again.
> 
> > (gdb) bt
> > #0  0x00003fffb16ac620 in .__memset_power8 () from /lib64/libc.so.6
> > #1  0x0000000010003344 in memset (__len=67108864, __ch=0,
> > __dest=0x3efffc000000) at /usr/include/bits/string3.h:84
> > #2  alloc_free_huge_on_node (node=<optimized out>, size=67108864) at
> > move_pages12.c:170
> > #3  0x0000000010003648 in setup () at move_pages12.c:235
> > #4  0x0000000010006ad4 in do_test_setup () at tst_test.c:705
> > #5  testrun () at tst_test.c:778
> > #6  tst_run_tcases (argc=<optimized out>, argv=0x3fffd1c7e488,
> > self=<optimized out>) at tst_test.c:884
> > #7  0x0000000010002f58 in main (argc=<optimized out>, argv=<optimized out>)
> > at ../../../../include/tst_test.h:189
> > 
> > [pid 48425] 08:45:57.151242 write(2, "move_pages12.c:143:
> > \33[1;34mINFO:"..., 82move_pages12.c:143: INFO: Allocating and freeing 4
> > hug
> > epages on node 2
> > ) = 82
> > [pid 48425] 08:45:57.151287 mmap(NULL, 67108864, PROT_READ|PROT_WRITE,
> > MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x3efffc000000
> > [pid 48425] 08:45:57.151442 mbind(0x3efffc000000, 67108864, MPOL_BIND,
> > [0x0000000000000004, 000000000000000000, 000000000000000000, 00
> > 0000000000000000], 257, 0) = 0
> > [pid 48425] 08:45:57.167377 munmap(0x3efffc000000, 67108864) = 0
> > [pid 48425] 08:45:57.167486 write(2, "move_pages12.c:143:
> > \33[1;34mINFO:"..., 82move_pages12.c:143: INFO: Allocating and freeing 4
> > hug
> > epages on node 3
> > ) = 82
> > [pid 48425] 08:45:57.167554 mmap(NULL, 67108864, PROT_READ|PROT_WRITE,
> > MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x3efffc000000
> > [pid 48425] 08:45:57.167648 mbind(0x3efffc000000, 67108864, MPOL_BIND,
> > [0x0000000000000008, 000000000000000000, 000000000000000000, 00
> > 0000000000000000], 257, 0) = 0
> > [pid 48425] 08:45:57.172293 --- SIGBUS {si_signo=SIGBUS,
> > si_code=BUS_ADRERR, si_addr=0x3efffe000000} ---
> 
> Looks like we happen to got the signal when we try to fault third page,
> at least if the si_addr is correct it points in the middle of the
> mapping. So I guess that there is not enough continuous blocks to back
> the mapping.
> 
> --
> Cyril Hrubis
> chrubis@suse.cz
> 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-30 13:11               ` Jan Stancek
@ 2017-05-30 13:48                 ` Cyril Hrubis
  2017-05-31  7:46                   ` Jan Stancek
  0 siblings, 1 reply; 14+ messages in thread
From: Cyril Hrubis @ 2017-05-30 13:48 UTC (permalink / raw)
  To: ltp

Hi!
> > > I'm sporadically running into SIGBUS in this testcase, not sure if it's
> > > because of low memory or something else. Do you see it too?
> > 
> > None so far, but I haven't been running the test on anything else than
> > machines with just two numa nodes so far.
> > 
> > > I wonder if we should replace memset with MAP_POPULATE.
> > 
> > Isn't MAP_POPULATE best effort only?
> 
> It's a readahead for file mappings, not sure about anonymous.

See:

http://www.serverphorums.com/read.php?12,1204538,1204539#msg-1204539

As far as I remember the discussion MAP_POPULATE does not work in low
memory condition and mmap() returns without any error.

> As alternative commit [1] gave me idea to try mlock, and that seems
> to work too. If a node doesn't have enough memory I get ENOMEM.
> 
> diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
> index e1d956dba67e..4c7d5c2c01b0 100644
> --- a/testcases/kernel/syscalls/move_pages/move_pages12.c
> +++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
> @@ -165,9 +165,15 @@ static void alloc_free_huge_on_node(unsigned int node, size_t size)
>                 tst_brk(TBROK | TERRNO, "mbind() failed");
>         }
>  
> -       numa_bitmask_free(bm);
> +       TEST(mlock(mem, size));
> +       if (TEST_RETURN) {
> +               SAFE_MUNMAP(mem, size);
> +               if (TEST_ERRNO == ENOMEM || TEST_ERRNO == EAGAIN)
> +                       tst_brk(TCONF, "Cannot lock huge pages");
> +               tst_brk(TBROK | TTERRNO, "mlock failed");
> +       }
>  
> -       memset(mem, 0, size);
> +       numa_bitmask_free(bm);
>  
>         SAFE_MUNMAP(mem, size);
>  }

LGTM.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [LTP] [PATCH v2] move_pages12: Make sure hugepages are available
  2017-05-30 13:48                 ` Cyril Hrubis
@ 2017-05-31  7:46                   ` Jan Stancek
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Stancek @ 2017-05-31  7:46 UTC (permalink / raw)
  To: ltp


----- Original Message -----
> See:
> 
> http://www.serverphorums.com/read.php?12,1204538,1204539#msg-1204539

Thanks for link.

> LGTM.

Pushed with your ack.

Regards,
Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-05-31  7:46 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-16 10:07 [LTP] [PATCH v2] move_pages12: Make sure hugepages are available Cyril Hrubis
2017-05-16 12:28 ` Jan Stancek
2017-05-16 13:32   ` Cyril Hrubis
2017-05-16 14:05     ` Jan Stancek
2017-05-16 14:15       ` Cyril Hrubis
2017-05-16 14:15       ` Jan Stancek
2017-05-16 14:29         ` Cyril Hrubis
2017-05-17  8:21           ` Jan Stancek
2017-05-29 13:12           ` Jan Stancek
2017-05-29 13:45             ` Jan Stancek
2017-05-30 11:50             ` Cyril Hrubis
2017-05-30 13:11               ` Jan Stancek
2017-05-30 13:48                 ` Cyril Hrubis
2017-05-31  7:46                   ` Jan Stancek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.