All of lore.kernel.org
 help / color / mirror / Atom feed
* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
@ 2017-05-09 14:04 Cyril Hrubis
  2017-05-10  8:56 ` Jan Stancek
  0 siblings, 1 reply; 11+ messages in thread
From: Cyril Hrubis @ 2017-05-09 14:04 UTC (permalink / raw)
  To: ltp

This commit adds code into the test setup to mmap(), mbind() and fault
hugepages on both testing nodes prior to the test run in order to make sure
that there is enough continous space for the move_pages() syscall to move huge
pages back and forth.

Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
---
 .../kernel/syscalls/move_pages/move_pages12.c      | 42 ++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index de00346..765ea7e 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -128,6 +128,45 @@ static void do_test(void)
 	}
 }
 
+static void alloc_free_huge_on_node(unsigned int node, size_t size)
+{
+	char *mem;
+	long ret;
+	struct bitmask *bm;
+
+	tst_res(TINFO, "Allocating and freeing %zu hugepages on node %u",
+		size / hpsz, node);
+
+	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+	if (mem == MAP_FAILED) {
+		if (errno == ENOMEM)
+			tst_brk(TCONF, "Cannot allocate huge pages");
+
+		tst_brk(TBROK | TERRNO, "mmap(..., MAP_HUGETLB, ...) failed");
+	}
+
+	bm = numa_bitmask_alloc(numa_max_possible_node() + 1);
+	if (!bm)
+		tst_brk(TBROK | TERRNO, "numa_bitmask_alloc() failed");
+
+	numa_bitmask_setbit(bm, node);
+
+	ret = mbind(mem, size, MPOL_BIND, bm->maskp, bm->size + 1, 0);
+	if (ret) {
+		if (errno == ENOMEM)
+			tst_brk(TCONF, "Cannot mbind huge pages");
+
+		tst_brk(TBROK | TERRNO, "mbind() failed");
+	}
+
+	numa_bitmask_free(bm);
+
+	memset(mem, 0, size);
+
+	SAFE_MUNMAP(mem, size);
+}
+
 static void setup(void)
 {
 	int memfree, ret;
@@ -154,6 +193,9 @@ static void setup(void)
 	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
 	if (ret < 0)
 		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
+
+	alloc_free_huge_on_node(node1, TEST_PAGES * hpsz);
+	alloc_free_huge_on_node(node2, TEST_PAGES * hpsz);
 }
 
 static void cleanup(void)
-- 
2.7.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-09 14:04 [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test Cyril Hrubis
@ 2017-05-10  8:56 ` Jan Stancek
  2017-05-10 12:21   ` Cyril Hrubis
  2017-05-10 13:49   ` Cyril Hrubis
  0 siblings, 2 replies; 11+ messages in thread
From: Jan Stancek @ 2017-05-10  8:56 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> This commit adds code into the test setup to mmap(), mbind() and fault
> hugepages on both testing nodes prior to the test run in order to make sure
> that there is enough continous space for the move_pages() syscall to move
> huge
> pages back and forth.

I'm still getting sporadic failures with 4.11 kernel. It's freshly
booted system, so I would expect fragmentation to be low:

# numactl -H; ./move_pages12 
available: 2 nodes (0-1)
node 0 cpus: 0 2 4 6 8 10 12 14 16 18 20 22
node 0 size: 31963 MB
node 0 free: 31600 MB
node 1 cpus: 1 3 5 7 9 11 13 15 17 19 21 23
node 1 size: 32251 MB
node 1 free: 31915 MB
node distances:
node   0   1 
  0:  10  20 
  1:  20  10

tst_test.c:847: INFO: Timeout per run is 0h 05m 00s
move_pages12.c:184: INFO: Free RAM 65040204 kB
move_pages12.c:139: INFO: Allocating and freeing 2 hugepages on node 0
move_pages12.c:139: INFO: Allocating and freeing 2 hugepages on node 1
nodes: 0 1
move_pages12.c:87: FAIL: move_pages failed: ENOMEM

Summary:
passed   0
failed   1
skipped  0
warnings 0

Regards,
Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-10  8:56 ` Jan Stancek
@ 2017-05-10 12:21   ` Cyril Hrubis
  2017-05-10 13:01     ` Jan Stancek
  2017-05-10 13:49   ` Cyril Hrubis
  1 sibling, 1 reply; 11+ messages in thread
From: Cyril Hrubis @ 2017-05-10 12:21 UTC (permalink / raw)
  To: ltp

Hi!
> I'm still getting sporadic failures with 4.11 kernel. It's freshly
> booted system, so I would expect fragmentation to be low:
> 
> # numactl -H; ./move_pages12 
> available: 2 nodes (0-1)
> node 0 cpus: 0 2 4 6 8 10 12 14 16 18 20 22
> node 0 size: 31963 MB
> node 0 free: 31600 MB
> node 1 cpus: 1 3 5 7 9 11 13 15 17 19 21 23
> node 1 size: 32251 MB
> node 1 free: 31915 MB
> node distances:
> node   0   1 
>   0:  10  20 
>   1:  20  10
> 
> tst_test.c:847: INFO: Timeout per run is 0h 05m 00s
> move_pages12.c:184: INFO: Free RAM 65040204 kB
> move_pages12.c:139: INFO: Allocating and freeing 2 hugepages on node 0
> move_pages12.c:139: INFO: Allocating and freeing 2 hugepages on node 1
> nodes: 0 1
> move_pages12.c:87: FAIL: move_pages failed: ENOMEM

Hmm, reproduced here as well. One of the 100 runs failed for me as well.

I've looked at the code in mm/migrate.c but so far I could not spot a
place where it could fail with ENOMEM (apart from very unlikely single
page allocation which is used to store the parameters passed to the
syscall).

If I comment out the memset() in the test main loop the failure does not
seem to happen. So my guess is that the failure happens when we try to
isolate a page that is in the process of teardown and we get the ENOMEM
somewhere from the memory management internals and that it's OK to
ignore this failure. But that is just a wild guess.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-10 12:21   ` Cyril Hrubis
@ 2017-05-10 13:01     ` Jan Stancek
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Stancek @ 2017-05-10 13:01 UTC (permalink / raw)
  To: ltp


----- Original Message -----
> Hi!
> > I'm still getting sporadic failures with 4.11 kernel. It's freshly
> > booted system, so I would expect fragmentation to be low:
> > 
> > # numactl -H; ./move_pages12
> > available: 2 nodes (0-1)
> > node 0 cpus: 0 2 4 6 8 10 12 14 16 18 20 22
> > node 0 size: 31963 MB
> > node 0 free: 31600 MB
> > node 1 cpus: 1 3 5 7 9 11 13 15 17 19 21 23
> > node 1 size: 32251 MB
> > node 1 free: 31915 MB
> > node distances:
> > node   0   1
> >   0:  10  20
> >   1:  20  10
> > 
> > tst_test.c:847: INFO: Timeout per run is 0h 05m 00s
> > move_pages12.c:184: INFO: Free RAM 65040204 kB
> > move_pages12.c:139: INFO: Allocating and freeing 2 hugepages on node 0
> > move_pages12.c:139: INFO: Allocating and freeing 2 hugepages on node 1
> > nodes: 0 1
> > move_pages12.c:87: FAIL: move_pages failed: ENOMEM
> 
> Hmm, reproduced here as well. One of the 100 runs failed for me as well.
> 
> I've looked at the code in mm/migrate.c but so far I could not spot a
> place where it could fail with ENOMEM (apart from very unlikely single
> page allocation which is used to store the parameters passed to the
> syscall).
> 
> If I comment out the memset() in the test main loop the failure does not
> seem to happen. So my guess is that the failure happens when we try to
> isolate a page that is in the process of teardown and we get the ENOMEM
> somewhere from the memory management internals and that it's OK to
> ignore this failure. But that is just a wild guess.

Here's end of strace log from my system:
  https://paste.fedoraproject.org/paste/gg8Lbjg8LI-YM5S6dS8Aol5M1UNdIGYhyRLivL9gydE=/raw

It also suggests that failure happened sometime during memset() call.

> 
> --
> Cyril Hrubis
> chrubis@suse.cz
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-10  8:56 ` Jan Stancek
  2017-05-10 12:21   ` Cyril Hrubis
@ 2017-05-10 13:49   ` Cyril Hrubis
  2017-05-10 14:14     ` Jan Stancek
  1 sibling, 1 reply; 11+ messages in thread
From: Cyril Hrubis @ 2017-05-10 13:49 UTC (permalink / raw)
  To: ltp

Hi!
I've got a hint from our kernel devs that the problem may be that the
per-node hugepage pool limits are set too low and increasing these
seems to fix the issue for me. Apparently the /proc/sys/vm/nr_hugepages
is global limit while the per-node limits are in sysfs.

Try increasing:

/sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages

Also if I write 0 to the nr_hugepages there while the test is running
move_pages() fails with ENOMEM reproducibly.

I will prepare a patch that will increase these limits in the test setup
temporarily.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-10 13:49   ` Cyril Hrubis
@ 2017-05-10 14:14     ` Jan Stancek
  2017-05-10 15:08       ` Cyril Hrubis
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Stancek @ 2017-05-10 14:14 UTC (permalink / raw)
  To: ltp


----- Original Message -----
> Hi!
> I've got a hint from our kernel devs that the problem may be that the
> per-node hugepage pool limits are set too low and increasing these
> seems to fix the issue for me. Apparently the /proc/sys/vm/nr_hugepages
> is global limit while the per-node limits are in sysfs.
> 
> Try increasing:
> 
> /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages

I'm not sure how that explains why it fails mid-test and not immediately
after start. It reminds me of sporadic hugetlbfs testsuite failures
in "counters" testcase.

diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index 443b0c6..fe8384f 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -84,6 +84,12 @@ static void do_child(void)
                        pages, nodes, status, MPOL_MF_MOVE_ALL));
                if (TEST_RETURN) {
                        tst_res(TFAIL | TTERRNO, "move_pages failed");
+                       system("cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages");
+                       system("cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/free_hugepages");
                        break;
                }
        }

I have 2 huge pages on each node when it fails:

tst_test.c:847: INFO: Timeout per run is 0h 05m 00s
move_pages12.c:190: INFO: Free RAM 45745800 kB
move_pages12.c:86: FAIL: move_pages failed: ENOMEM
moving to node: 0
2
2
0
2

I'm trying now with 40 instead of 4 huge pages.

Regards,
Jan


> 
> Also if I write 0 to the nr_hugepages there while the test is running
> move_pages() fails with ENOMEM reproducibly.
> 
> I will prepare a patch that will increase these limits in the test setup
> temporarily.
> 
> --
> Cyril Hrubis
> chrubis@suse.cz
> 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-10 14:14     ` Jan Stancek
@ 2017-05-10 15:08       ` Cyril Hrubis
  2017-05-11  6:40         ` Jan Stancek
  0 siblings, 1 reply; 11+ messages in thread
From: Cyril Hrubis @ 2017-05-10 15:08 UTC (permalink / raw)
  To: ltp

Hi!
> > I've got a hint from our kernel devs that the problem may be that the
> > per-node hugepage pool limits are set too low and increasing these
> > seems to fix the issue for me. Apparently the /proc/sys/vm/nr_hugepages
> > is global limit while the per-node limits are in sysfs.
> > 
> > Try increasing:
> > 
> > /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
> 
> I'm not sure how that explains why it fails mid-test and not immediately
> after start. It reminds me of sporadic hugetlbfs testsuite failures
> in "counters" testcase.

Probably some kind of lazy update / deffered freeing that still accounts
for freshly removed pages.

> diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
> index 443b0c6..fe8384f 100644
> --- a/testcases/kernel/syscalls/move_pages/move_pages12.c
> +++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
> @@ -84,6 +84,12 @@ static void do_child(void)
>                         pages, nodes, status, MPOL_MF_MOVE_ALL));
>                 if (TEST_RETURN) {
>                         tst_res(TFAIL | TTERRNO, "move_pages failed");
> +                       system("cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages");
> +                       system("cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/free_hugepages");
>                         break;
>                 }
>         }

Well that is a few forks away after the failure, if the race window is
small enough we will never see the real value but maybe doing open() and
read() directly would show us different values.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-10 15:08       ` Cyril Hrubis
@ 2017-05-11  6:40         ` Jan Stancek
  2017-05-11 12:26           ` Cyril Hrubis
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Stancek @ 2017-05-11  6:40 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> Hi!
> > > I've got a hint from our kernel devs that the problem may be that the
> > > per-node hugepage pool limits are set too low and increasing these
> > > seems to fix the issue for me. Apparently the /proc/sys/vm/nr_hugepages
> > > is global limit while the per-node limits are in sysfs.
> > > 
> > > Try increasing:
> > > 
> > > /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
> > 
> > I'm not sure how that explains why it fails mid-test and not immediately
> > after start. It reminds me of sporadic hugetlbfs testsuite failures
> > in "counters" testcase.
> 
> Probably some kind of lazy update / deffered freeing that still accounts
> for freshly removed pages.

That was my impression as well.

> 
> > diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c
> > b/testcases/kernel/syscalls/move_pages/move_pages12.c
> > index 443b0c6..fe8384f 100644
> > --- a/testcases/kernel/syscalls/move_pages/move_pages12.c
> > +++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
> > @@ -84,6 +84,12 @@ static void do_child(void)
> >                         pages, nodes, status, MPOL_MF_MOVE_ALL));
> >                 if (TEST_RETURN) {
> >                         tst_res(TFAIL | TTERRNO, "move_pages failed");
> > +                       system("cat
> > /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages");
> > +                       system("cat
> > /sys/devices/system/node/node*/hugepages/hugepages-2048kB/free_hugepages");
> >                         break;
> >                 }
> >         }
> 
> Well that is a few forks away after the failure, if the race window is
> small enough we will never see the real value but maybe doing open() and
> read() directly would show us different values.

For free/reserved, sure. But is the number of reserved huge pages on
each node going to change over time?

---

I was running with 20+20 huge pages over night and it hasn't failed
single time. So I'm thinking we allocate 3+3 or 4+4 to avoid any
issues related to lazy/deffered updates.

Regards,
Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-11  6:40         ` Jan Stancek
@ 2017-05-11 12:26           ` Cyril Hrubis
  2017-05-11 12:50             ` Jan Stancek
  0 siblings, 1 reply; 11+ messages in thread
From: Cyril Hrubis @ 2017-05-11 12:26 UTC (permalink / raw)
  To: ltp

Hi!
> > Well that is a few forks away after the failure, if the race window is
> > small enough we will never see the real value but maybe doing open() and
> > read() directly would show us different values.
> 
> For free/reserved, sure. But is the number of reserved huge pages on
> each node going to change over time?

Of course I was speaking about the number of currently free huge pages.
The pool limit will not change unless something from userspace writes to
the sysfs file...

> ---
> 
> I was running with 20+20 huge pages over night and it hasn't failed
> single time. So I'm thinking we allocate 3+3 or 4+4 to avoid any
> issues related to lazy/deffered updates.

But we have to lift the per node limits as well, right?

So what about lifting the per node limit to something as 20 and then try
to allocate 4 hugepages on each node prior the test?

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-11 12:26           ` Cyril Hrubis
@ 2017-05-11 12:50             ` Jan Stancek
  2017-05-16  9:30               ` Cyril Hrubis
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Stancek @ 2017-05-11 12:50 UTC (permalink / raw)
  To: ltp



----- Original Message -----
> Hi!
> > > Well that is a few forks away after the failure, if the race window is
> > > small enough we will never see the real value but maybe doing open() and
> > > read() directly would show us different values.
> > 
> > For free/reserved, sure. But is the number of reserved huge pages on
> > each node going to change over time?
> 
> Of course I was speaking about the number of currently free huge pages.
> The pool limit will not change unless something from userspace writes to
> the sysfs file...
> 
> > ---
> > 
> > I was running with 20+20 huge pages over night and it hasn't failed
> > single time. So I'm thinking we allocate 3+3 or 4+4 to avoid any
> > issues related to lazy/deffered updates.
> 
> But we have to lift the per node limits as well, right?

Sorry, what I meant by 'allocate' was configuring per node limits.
I was using your patch as-is, with 2 huge pages allocated/touched
on each node. 

> 
> So what about lifting the per node limit to something as 20 and then try
> to allocate 4 hugepages on each node prior the test?

per node limit 8 and allocate 4 hugepages on each? What worries
me are architectures, where default huge page is very large
(e.g. 512M on aarch64).

Regards,
Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test
  2017-05-11 12:50             ` Jan Stancek
@ 2017-05-16  9:30               ` Cyril Hrubis
  0 siblings, 0 replies; 11+ messages in thread
From: Cyril Hrubis @ 2017-05-16  9:30 UTC (permalink / raw)
  To: ltp

Hi!
> per node limit 8 and allocate 4 hugepages on each? What worries
> me are architectures, where default huge page is very large
> (e.g. 512M on aarch64).

I finally got access to my testing machines and:

The kernel tries to distribute the huge page pools evenly between nodes
hence for a machine with a two nodes reserving 8 instead of 4 huge pages
fixes the problem completely, i.e. following patch:


diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index de0034626..0305055a9 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -149,7 +149,7 @@ static void setup(void)
        hpsz *= 1024;
 
        SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
-       SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 4);
+       SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 8);
 
        ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
        if (ret < 0)

Since reserving eight more huge pages in the global poll reserves 4 more on each node. The /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages is adjusted by 4 by the kernel and everything works fine (reserving just two is prone to some kind of race which is what we discussed before).

Now this wouldn't possibly work on a machine with more than two nodes, hence we may as well try to update the per-node pools.

So follwoing patch does:

1. Tries to setup per node hugepage pool to 4 huge pages
2. If 1. fails we try to setup the global pool to 8 and
   allocate 4 huge pages on each node

Which seems to be best approach to me to make sure that there is enough huge
pages available for the test.

From 995787fa48c24ed4507d6aa605162ff16c81f4b6 Mon Sep 17 00:00:00 2001
From: Cyril Hrubis <chrubis@suse.cz>
Date: Tue, 9 May 2017 15:43:47 +0200
Subject: [PATCH] move_pages12: Make sure hugepages are available

This commit makes sure that enough huge pages are available on each node prior
to the test.

One problem we had is that there has to be at least four huge pages available
in the per-node pools even though we only allocate two. One of the
possibilities is that when we are moving pages back and forth between the nodes
there may be some overlap when huge page is allocated on a node but the two
huge pages there, that are about to be moved, are still there or at least
accounted for. Hence we have to make sure that at least four huge pages are
available prior to the test.

The second problem is that huge page pools are limited by several files in the
virtual filesystem. There is global knob for controlling the huge page pool
size in /proc, then there are per-node knobs in /sys. The value written to the
global knob is distributed evenly between the per-node knobs, hence on two node
machine writing 8 to the global knob is sufficient to make sure there is enough
huge pages for the test. But that does not work if the machine has three or
more nodes. Hence this patch tries to adjust per-node pools on the nodes
selected for the test and only if that is not possible we adjust the global
knob and then make sure that expected number of huge pages could be allocated
on each node.

Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
---
 .../kernel/syscalls/move_pages/move_pages12.c      | 101 +++++++++++++++++++--
 1 file changed, 94 insertions(+), 7 deletions(-)

diff --git a/testcases/kernel/syscalls/move_pages/move_pages12.c b/testcases/kernel/syscalls/move_pages/move_pages12.c
index de00346..6a1a186 100644
--- a/testcases/kernel/syscalls/move_pages/move_pages12.c
+++ b/testcases/kernel/syscalls/move_pages/move_pages12.c
@@ -35,6 +35,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <string.h>
+#include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
@@ -52,7 +53,11 @@
 #define TEST_NODES	2
 
 static int pgsz, hpsz;
-static long orig_hugepages;
+static long orig_hugepages = -1;
+static char path_hugepages_node1[PATH_MAX];
+static char path_hugepages_node2[PATH_MAX];
+static long orig_hugepages_node1 = -1;
+static long orig_hugepages_node2 = -1;
 static unsigned int node1, node2;
 static void *addr;
 
@@ -128,6 +133,45 @@ static void do_test(void)
 	}
 }
 
+static void alloc_free_huge_on_node(unsigned int node, size_t size)
+{
+	char *mem;
+	long ret;
+	struct bitmask *bm;
+
+	tst_res(TINFO, "Allocating and freeing %zu hugepages on node %u",
+		size / hpsz, node);
+
+	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+	if (mem == MAP_FAILED) {
+		if (errno == ENOMEM)
+			tst_brk(TCONF, "Cannot allocate huge pages");
+
+		tst_brk(TBROK | TERRNO, "mmap(..., MAP_HUGETLB, ...) failed");
+	}
+
+	bm = numa_bitmask_alloc(numa_max_possible_node() + 1);
+	if (!bm)
+		tst_brk(TBROK | TERRNO, "numa_bitmask_alloc() failed");
+
+	numa_bitmask_setbit(bm, node);
+
+	ret = mbind(mem, size, MPOL_BIND, bm->maskp, bm->size + 1, 0);
+	if (ret) {
+		if (errno == ENOMEM)
+			tst_brk(TCONF, "Cannot mbind huge pages");
+
+		tst_brk(TBROK | TERRNO, "mbind() failed");
+	}
+
+	numa_bitmask_free(bm);
+
+	memset(mem, 0, size);
+
+	SAFE_MUNMAP(mem, size);
+}
+
 static void setup(void)
 {
 	int memfree, ret;
@@ -137,6 +181,10 @@ static void setup(void)
 	if (access(PATH_HUGEPAGES, F_OK))
 		tst_brk(TCONF, "Huge page not supported");
 
+	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
+	if (ret < 0)
+		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
+
 	pgsz = (int)get_page_size();
 	SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "Hugepagesize: %d", &hpsz);
 
@@ -148,17 +196,56 @@ static void setup(void)
 
 	hpsz *= 1024;
 
-	SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
-	SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 4);
+	snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
+		 "/sys/devices/system/node/node%u/hugepages/hugepages-2048kB/nr_hugepages",
+		 node1);
+
+	snprintf(path_hugepages_node2, sizeof(path_hugepages_node2),
+		 "/sys/devices/system/node/node%u/hugepages/hugepages-2048kB/nr_hugepages",
+		 node2);
+
+	if (!access(path_hugepages_node1, F_OK)) {
+		SAFE_FILE_SCANF(path_hugepages_node1,
+				"%ld", &orig_hugepages_node1);
+		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
+			node1, orig_hugepages_node1 + 4);
+		SAFE_FILE_PRINTF(path_hugepages_node1,
+				 "%ld", orig_hugepages_node1 + 4);
+	}
 
-	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
-	if (ret < 0)
-		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
+	if (!access(path_hugepages_node2, F_OK)) {
+		SAFE_FILE_SCANF(path_hugepages_node2,
+				"%ld", &orig_hugepages_node2);
+		tst_res(TINFO, "Increasing hugepages pool on node %u to %ld",
+			node2, orig_hugepages_node2 + 4);
+		SAFE_FILE_PRINTF(path_hugepages_node2,
+				 "%ld", orig_hugepages_node2 + 4);
+	}
+
+	if (orig_hugepages_node1 == -1 || orig_hugepages_node2 == -1) {
+		SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
+		tst_res(TINFO, "Increasing global hugepages pool to %ld",
+			orig_hugepages + 8);
+		SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 8);
+		alloc_free_huge_on_node(node1, 4 * hpsz);
+		alloc_free_huge_on_node(node2, 4 * hpsz);
+	}
 }
 
 static void cleanup(void)
 {
-	SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages);
+	if (orig_hugepages != -1)
+		SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages);
+
+	if (orig_hugepages_node1 != -1) {
+		SAFE_FILE_PRINTF(path_hugepages_node1,
+				 "%ld", orig_hugepages_node1);
+	}
+
+	if (orig_hugepages_node2 != -1) {
+		SAFE_FILE_PRINTF(path_hugepages_node2,
+				 "%ld", orig_hugepages_node2);
+	}
 }
 
 static struct tst_test test = {
-- 
2.7.3

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-05-16  9:30 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-09 14:04 [LTP] [RFC] [PATCH] move_pages12: Allocate and free hugepages prior the test Cyril Hrubis
2017-05-10  8:56 ` Jan Stancek
2017-05-10 12:21   ` Cyril Hrubis
2017-05-10 13:01     ` Jan Stancek
2017-05-10 13:49   ` Cyril Hrubis
2017-05-10 14:14     ` Jan Stancek
2017-05-10 15:08       ` Cyril Hrubis
2017-05-11  6:40         ` Jan Stancek
2017-05-11 12:26           ` Cyril Hrubis
2017-05-11 12:50             ` Jan Stancek
2017-05-16  9:30               ` Cyril Hrubis

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.