All of lore.kernel.org
 help / color / mirror / Atom feed
* [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED)
@ 2016-04-01  5:24 Li Wang
  2016-04-01  5:30 ` Li Wang
  2016-04-04 17:30 ` Cyril Hrubis
  0 siblings, 2 replies; 4+ messages in thread
From: Li Wang @ 2016-04-01  5:24 UTC (permalink / raw)
  To: ltp

Page fault occurs in spite that madvise(WILLNEED) system call is called
to prefetch the page. This issue is reproduced by running a program
which sequentially accesses to a shared memory and calls madvise(WILLNEED)
to the next page on a page fault.

Fixed by commit:
	55231e5c898 mm: madvise: fix MADV_WILLNEED on shmem swapouts

Signed-off-by: Li Wang <liwang@redhat.com>
---
 runtest/syscalls                              |   1 +
 testcases/kernel/syscalls/.gitignore          |   1 +
 testcases/kernel/syscalls/madvise/madvise06.c | 157 ++++++++++++++++++++++++++
 3 files changed, 159 insertions(+)
 create mode 100644 testcases/kernel/syscalls/madvise/madvise06.c

diff --git a/runtest/syscalls b/runtest/syscalls
index b41c927..732c2ca 100644
--- a/runtest/syscalls
+++ b/runtest/syscalls
@@ -743,6 +743,7 @@ madvise02 madvise02
 madvise03 madvise03
 madvise04 madvise04
 madvise05 madvise05
+madvise06 madvise06
 
 newuname01 newuname01
 
diff --git a/testcases/kernel/syscalls/.gitignore b/testcases/kernel/syscalls/.gitignore
index 0540928..ffa5db1 100644
--- a/testcases/kernel/syscalls/.gitignore
+++ b/testcases/kernel/syscalls/.gitignore
@@ -504,6 +504,7 @@
 /madvise/madvise03
 /madvise/madvise04
 /madvise/madvise05
+/madvise/madvise06
 /mallopt/mallopt01
 /mbind/mbind01
 /memcmp/memcmp01
diff --git a/testcases/kernel/syscalls/madvise/madvise06.c b/testcases/kernel/syscalls/madvise/madvise06.c
new file mode 100644
index 0000000..44a40ba
--- /dev/null
+++ b/testcases/kernel/syscalls/madvise/madvise06.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * DESCRIPTION
+ *
+ *   Page fault occurs in spite that madvise(WILLNEED) system call is called
+ *   to prefetch the page. This issue is reproduced by running a program
+ *   which sequentially accesses to a shared memory and calls madvise(WILLNEED)
+ *   to the next page on a page fault.
+ *
+ *   This bug is present in all RHEL7 versions. It looks like this was fixed in
+ *   mainline kernel > v3.15 by the following patch:
+ *
+ *   commit 55231e5c898c5c03c14194001e349f40f59bd300
+ *   Author: Johannes Weiner <hannes@cmpxchg.org>
+ *   Date:   Thu May 22 11:54:17 2014 -0700
+ *
+ *       mm: madvise: fix MADV_WILLNEED on shmem swapouts
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+
+#include "test.h"
+#include "safe_macros.h"
+
+char *TCID = "madvise06";
+int TST_TOTAL = 1;
+
+#ifdef __x86_64__
+
+#define GB_SZ  (1024*1024*1024)
+#define PG_SZ  (4*1024)
+
+static long dst_max;
+
+static void setup(void);
+static int  get_page_fault_num(void);
+static void test_advice_willneed(void);
+
+int main(int argc, char *argv[])
+{
+	int lc;
+
+	tst_parse_opts(argc, argv, NULL, NULL);
+
+	setup();
+
+	for (lc = 0; TEST_LOOPING(lc); lc++)
+		test_advice_willneed();
+
+	tst_exit();
+}
+
+static void setup(void)
+{
+	struct sysinfo sys_buf;
+
+	sysinfo(&sys_buf);
+
+	if (sys_buf.totalram < 2L * GB_SZ)
+		tst_brkm(TCONF, NULL, "Test requires more than 2GB of RAM");
+	if (sys_buf.totalram > 100L * GB_SZ)
+		tst_brkm(TCONF, NULL, "System RAM is too large, skip test");
+
+	dst_max = sys_buf.totalram / GB_SZ;
+	tst_resm(TINFO, "dst_max = %ld", dst_max);
+
+	tst_sig(NOFORK, DEF_HANDLER, NULL);
+
+	TEST_PAUSE;
+}
+
+static int get_page_fault_num(void)
+{
+	int pg;
+
+	SAFE_FILE_SCANF(NULL, "/proc/self/stat",
+			"%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d",
+			&pg);
+
+	return pg;
+}
+
+static void test_advice_willneed(void)
+{
+	int i;
+	char *src;
+	char *dst[100];
+	int page_fault_num_1;
+	int page_fault_num_2;
+
+	/* allocate source memory (1GB only) */
+	src = SAFE_MMAP(NULL, NULL, 1 * GB_SZ, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_ANONYMOUS,
+			-1, 0);
+
+	/* allocate destination memory (array) */
+	for (i = 0; i < dst_max; ++i)
+		dst[i] = SAFE_MMAP(NULL, NULL, 1 * GB_SZ,
+				PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_ANONYMOUS,
+				-1, 0);
+
+	/* memmove source to each destination memories (for SWAP-OUT) */
+	for (i = 0; i < dst_max; ++i)
+		memmove(dst[i], src, 1 * GB_SZ);
+
+	tst_resm(TINFO, "PageFault(no madvice): %d", get_page_fault_num());
+
+	/* Do madvice() to dst[0] */
+	TEST(madvise(dst[0], PG_SZ, MADV_WILLNEED));
+	if (TEST_RETURN == -1)
+		tst_brkm(TBROK | TERRNO, NULL, "madvise failed");
+
+	page_fault_num_1 = get_page_fault_num();
+	tst_resm(TINFO, "PageFault(madvice / no mem access): %d",
+			page_fault_num_1);
+
+	*dst[0] = 'a';
+	page_fault_num_2 = get_page_fault_num();
+	tst_resm(TINFO, "PageFault(madvice / mem access): %d",
+			page_fault_num_2);
+
+	if (page_fault_num_1 != page_fault_num_2)
+		tst_resm(TFAIL, "Bug has been reproduced");
+	else
+		tst_resm(TPASS, "Regression test pass");
+
+	SAFE_MUNMAP(NULL, src, 1 * GB_SZ);
+	for (i = 0; i < dst_max; ++i)
+		SAFE_MUNMAP(NULL, dst[i], 1 * GB_SZ);
+}
+
+
+#else
+int main(void)
+{
+	tst_brkm(TCONF, NULL, "Only test on x86_64.");
+}
+#endif
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED)
  2016-04-01  5:24 [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED) Li Wang
@ 2016-04-01  5:30 ` Li Wang
  2016-04-04 17:30 ` Cyril Hrubis
  1 sibling, 0 replies; 4+ messages in thread
From: Li Wang @ 2016-04-01  5:30 UTC (permalink / raw)
  To: ltp

Sorry, I missed to append the notes in patch V3.

    V2 --> V3
    * remove the unused <sys/shm.h>
    * reset TST_TOTAL = 1
    * rename variable dst_num to dst_max
    * take use of SAFE_MMAP(), SAFE_MUNMAP()
    * no sleep() in the program
    * fix the lines over 80 chars issue
    * delete the kernel choise lines


-- 
Regards,
Li Wang
Email: liwang@redhat.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linux.it/pipermail/ltp/attachments/20160401/b91c3aa5/attachment.html>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED)
  2016-04-01  5:24 [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED) Li Wang
  2016-04-01  5:30 ` Li Wang
@ 2016-04-04 17:30 ` Cyril Hrubis
  2016-04-05  4:34   ` Li Wang
  1 sibling, 1 reply; 4+ messages in thread
From: Cyril Hrubis @ 2016-04-04 17:30 UTC (permalink / raw)
  To: ltp

Hi!
> +#ifdef __x86_64__

Last question, why do we limit the test to x86_64?

Since apart from hardcoded page size, there does not seem to be anything
x86_64 specific at all.

Otherwise the test looks fine.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED)
  2016-04-04 17:30 ` Cyril Hrubis
@ 2016-04-05  4:34   ` Li Wang
  0 siblings, 0 replies; 4+ messages in thread
From: Li Wang @ 2016-04-05  4:34 UTC (permalink / raw)
  To: ltp

Hello Cyril,

On Tue, Apr 5, 2016 at 1:30 AM, Cyril Hrubis <chrubis@suse.cz> wrote:

> Hi!
> > +#ifdef __x86_64__
>
> Last question, why do we limit the test to x86_64?
>
> Since apart from hardcoded page size, there does not seem to be anything
> x86_64 specific at all.
>

Thanks for reminding me, limit the test program to x86_64 because the bug
was detected on that platform, I did not noticed the portable issue.

If the following changes test good(bug reproduced) on other platform, I
will post V4.

v3 ---> v4
------------
$ git diff
diff --git a/testcases/kernel/syscalls/madvise/madvise06.c
b/testcases/kernel/syscalls/madvise/madvise06.c
index 44a40ba..41e51f9 100644
--- a/testcases/kernel/syscalls/madvise/madvise06.c
+++ b/testcases/kernel/syscalls/madvise/madvise06.c
@@ -43,10 +43,7 @@
 char *TCID = "madvise06";
 int TST_TOTAL = 1;

-#ifdef __x86_64__
-
 #define GB_SZ  (1024*1024*1024)
-#define PG_SZ  (4*1024)

 static long dst_max;

@@ -100,7 +97,7 @@ static int get_page_fault_num(void)

 static void test_advice_willneed(void)
 {
-       int i;
+       int i, pg_sz;
        char *src;
        char *dst[100];
        int page_fault_num_1;
@@ -124,8 +121,9 @@ static void test_advice_willneed(void)

        tst_resm(TINFO, "PageFault(no madvice): %d", get_page_fault_num());

+       pg_sz = getpagesize();
        /* Do madvice() to dst[0] */
-       TEST(madvise(dst[0], PG_SZ, MADV_WILLNEED));
+       TEST(madvise(dst[0], pg_sz, MADV_WILLNEED));
        if (TEST_RETURN == -1)
                tst_brkm(TBROK | TERRNO, NULL, "madvise failed");

@@ -147,11 +145,3 @@ static void test_advice_willneed(void)
        for (i = 0; i < dst_max; ++i)
                SAFE_MUNMAP(NULL, dst[i], 1 * GB_SZ);
 }
-
-
-#else
-int main(void)
-{
-       tst_brkm(TCONF, NULL, "Only test on x86_64.");
-}
-#endif



>
> Otherwise the test looks fine.
>
> --
> Cyril Hrubis
> chrubis@suse.cz
>



-- 
Regards,
Li Wang
Email: liwang@redhat.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linux.it/pipermail/ltp/attachments/20160405/149c46b8/attachment.html>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-04-05  4:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-01  5:24 [LTP] [PATCH V3] madvice: new case for madvise(WILLNEED) Li Wang
2016-04-01  5:30 ` Li Wang
2016-04-04 17:30 ` Cyril Hrubis
2016-04-05  4:34   ` Li Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.