All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: Matthew Wilcox <willy@infradead.org>, linux-mm@kvack.org
Cc: Roman Gushchin <roman.gushchin@linux.dev>,
	Shuah Khan <shuah@kernel.org>, Yang Shi <shy828301@gmail.com>,
	Miaohe Lin <linmiaohe@huawei.com>,
	Hugh Dickins <hughd@google.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-kselftest@vger.kernel.org, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 5/5] mm: huge_memory: enable debugfs to split huge pages to any order.
Date: Mon, 21 Mar 2022 10:21:28 -0400	[thread overview]
Message-ID: <20220321142128.2471199-6-zi.yan@sent.com> (raw)
In-Reply-To: <20220321142128.2471199-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

It is used to test split_huge_page_to_list_to_order for pagecache THPs.
Also add test cases for split_huge_page_to_list_to_order via both
debugfs, truncating a file, and punching holes in a file.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/huge_memory.c                              |  26 ++-
 .../selftests/vm/split_huge_page_test.c       | 219 +++++++++++++++---
 2 files changed, 201 insertions(+), 44 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 76db0092a1e2..7645bb12fcbc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2856,7 +2856,7 @@ static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
-				unsigned long vaddr_end)
+				unsigned long vaddr_end, unsigned int new_order)
 {
 	int ret = 0;
 	struct task_struct *task;
@@ -2926,7 +2926,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		if (!trylock_page(page))
 			goto next;
 
-		if (!split_huge_page(page))
+		if (!split_huge_page_to_list_to_order(page, NULL, new_order))
 			split++;
 
 		unlock_page(page);
@@ -2944,7 +2944,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 }
 
 static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
-				pgoff_t off_end)
+				pgoff_t off_end, unsigned int new_order)
 {
 	struct filename *file;
 	struct file *candidate;
@@ -2984,7 +2984,7 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!trylock_page(fpage))
 			goto next;
 
-		if (!split_huge_page(fpage))
+		if (!split_huge_page_to_list_to_order(fpage, NULL, new_order))
 			split++;
 
 		unlock_page(fpage);
@@ -3009,10 +3009,14 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 {
 	static DEFINE_MUTEX(split_debug_mutex);
 	ssize_t ret;
-	/* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+	/*
+	 * hold pid, start_vaddr, end_vaddr, new_order or
+	 * file_path, off_start, off_end, new_order
+	 */
 	char input_buf[MAX_INPUT_BUF_SZ];
 	int pid;
 	unsigned long vaddr_start, vaddr_end;
+	unsigned int new_order = 0;
 
 	ret = mutex_lock_interruptible(&split_debug_mutex);
 	if (ret)
@@ -3041,29 +3045,29 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 			goto out;
 		}
 
-		ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
-		if (ret != 2) {
+		ret = sscanf(buf, "0x%lx,0x%lx,%d", &off_start, &off_end, &new_order);
+		if (ret != 2 && ret != 3) {
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = split_huge_pages_in_file(file_path, off_start, off_end);
+		ret = split_huge_pages_in_file(file_path, off_start, off_end, new_order);
 		if (!ret)
 			ret = input_len;
 
 		goto out;
 	}
 
-	ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+	ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d", &pid, &vaddr_start, &vaddr_end, &new_order);
 	if (ret == 1 && pid == 1) {
 		split_huge_pages_all();
 		ret = strlen(input_buf);
 		goto out;
-	} else if (ret != 3) {
+	} else if (ret != 3 && ret != 4) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order);
 	if (!ret)
 		ret = strlen(input_buf);
 out:
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 52497b7b9f1d..af01e7dca9c8 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -16,6 +16,7 @@
 #include <sys/mount.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include <time.h>
 
 uint64_t pagesize;
 unsigned int pageshift;
@@ -24,10 +25,11 @@ uint64_t pmd_pagesize;
 #define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
 #define SMAP_PATH "/proc/self/smaps"
+#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
-#define PID_FMT "%d,0x%lx,0x%lx"
-#define PATH_FMT "%s,0x%lx,0x%lx"
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
 
 #define PFN_MASK     ((1UL<<55)-1)
 #define KPF_THP      (1UL<<22)
@@ -75,23 +77,6 @@ static uint64_t read_pmd_pagesize(void)
 	return strtoul(buf, NULL, 10);
 }
 
-static int write_file(const char *path, const char *buf, size_t buflen)
-{
-	int fd;
-	ssize_t numwritten;
-
-	fd = open(path, O_WRONLY);
-	if (fd == -1)
-		return 0;
-
-	numwritten = write(fd, buf, buflen - 1);
-	close(fd);
-	if (numwritten < 1)
-		return 0;
-
-	return (unsigned int) numwritten;
-}
-
 static void write_debugfs(const char *fmt, ...)
 {
 	char input[INPUT_MAX];
@@ -106,11 +91,6 @@ static void write_debugfs(const char *fmt, ...)
 		printf("%s: Debugfs input is too long\n", __func__);
 		exit(EXIT_FAILURE);
 	}
-
-	if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) {
-		perror(SPLIT_DEBUGFS);
-		exit(EXIT_FAILURE);
-	}
 }
 
 #define MAX_LINE_LENGTH 500
@@ -124,7 +104,7 @@ static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
 	return false;
 }
 
-static uint64_t check_huge(void *addr)
+static uint64_t check_huge(void *addr, const char *prefix)
 {
 	uint64_t thp = 0;
 	int ret;
@@ -149,13 +129,13 @@ static uint64_t check_huge(void *addr)
 		goto err_out;
 
 	/*
-	 * Fetch the AnonHugePages: in the same block and check the number of
+	 * Fetch the @prefix in the same block and check the number of
 	 * hugepages.
 	 */
-	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+	if (!check_for_pattern(fp, prefix, buffer))
 		goto err_out;
 
-	if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) {
+	if (sscanf(&buffer[strlen(prefix)], "%10ld kB", &thp) != 1) {
 		printf("Reading smap error\n");
 		exit(EXIT_FAILURE);
 	}
@@ -184,7 +164,7 @@ void split_pmd_thp(void)
 	for (i = 0; i < len; i++)
 		one_page[i] = (char)i;
 
-	thp_size = check_huge(one_page);
+	thp_size = check_huge(one_page, "AnonHugePages:");
 	if (!thp_size) {
 		printf("No THP is allocated\n");
 		exit(EXIT_FAILURE);
@@ -192,7 +172,7 @@ void split_pmd_thp(void)
 
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
-		(uint64_t)one_page + len);
+		(uint64_t)one_page + len, 0);
 
 	for (i = 0; i < len; i++)
 		if (one_page[i] != (char)i) {
@@ -201,7 +181,7 @@ void split_pmd_thp(void)
 		}
 
 
-	thp_size = check_huge(one_page);
+	thp_size = check_huge(one_page, "AnonHugePages:");
 	if (thp_size) {
 		printf("Still %ld kB AnonHugePages not split\n", thp_size);
 		exit(EXIT_FAILURE);
@@ -249,7 +229,7 @@ void split_pte_mapped_thp(void)
 	for (i = 0; i < len; i++)
 		one_page[i] = (char)i;
 
-	thp_size = check_huge(one_page);
+	thp_size = check_huge(one_page, "AnonHugePages:");
 	if (!thp_size) {
 		printf("No THP is allocated\n");
 		exit(EXIT_FAILURE);
@@ -284,7 +264,7 @@ void split_pte_mapped_thp(void)
 
 	/* split all remapped THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
-		      (uint64_t)pte_mapped + pagesize * 4);
+		      (uint64_t)pte_mapped + pagesize * 4, 0);
 
 	/* smap does not show THPs after mremap, use kpageflags instead */
 	thp_size = 0;
@@ -371,20 +351,193 @@ void split_file_backed_thp(void)
 	printf("file-backed THP split test done, please check dmesg for more information\n");
 }
 
+void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+{
+	size_t i;
+	int dummy;
+
+	srand(time(NULL));
+
+	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
+	if (*fd == -1) {
+		perror("Failed to create a file at "THP_FS_PATH);
+		exit(EXIT_FAILURE);
+	}
+
+	for (i = 0; i < fd_size; i++) {
+		unsigned char byte = (unsigned char)i;
+
+		write(*fd, &byte, sizeof(byte));
+	}
+	close(*fd);
+	sync();
+	*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+	if (*fd == -1) {
+		perror("open drop_caches");
+		exit(EXIT_FAILURE);
+	}
+	if (write(*fd, "3", 1) != 1) {
+		perror("write to drop_caches");
+		exit(EXIT_FAILURE);
+	}
+	close(*fd);
+
+	*fd = open(testfile, O_RDWR);
+	if (*fd == -1) {
+		perror("Failed to open a file at "THP_FS_PATH);
+		exit(EXIT_FAILURE);
+	}
+
+	*addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+	if (*addr == (char *)-1) {
+		perror("cannot mmap");
+		exit(1);
+	}
+	madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+	for (size_t i = 0; i < fd_size; i++)
+		dummy += *(*addr + i);
+
+	if (!check_huge(*addr, "FilePmdMapped:")) {
+		printf("No pagecache THP generated, please mount a filesystem "
+			"supporting pagecache THP at "THP_FS_PATH"\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
+void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	printf("split %ld kB pagecache page to order %d ... ", fd_size >> 10, order);
+	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+	for (i = 0; i < fd_size; i++)
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			exit(EXIT_FAILURE);
+		}
+
+	close(fd);
+	unlink(testfile);
+	printf("done\n");
+}
+
+void truncate_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	printf("truncate %ld kB pagecache page to size %lu kB ... ", fd_size >> 10, 4UL << order);
+	ftruncate(fd, pagesize << order);
+
+	for (i = 0; i < (pagesize << order); i++)
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			exit(EXIT_FAILURE);
+		}
+
+	close(fd);
+	unlink(testfile);
+	printf("done\n");
+}
+
+void punch_hole_in_pagecache_thp(size_t fd_size, off_t offset[], off_t len[], int n)
+{
+	int fd, j;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	for (j = 0; j < n; j++) {
+		printf("addr: %lx, punch a hole at offset %ld kB with len %ld kB ... ",
+			(unsigned long)addr, offset[j] >> 10, len[j] >> 10);
+		fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset[j], len[j]);
+		printf("done\n");
+	}
+
+	for (i = 0; i < fd_size; i++) {
+		int in_hole = 0;
+
+		for (j = 0; j < n; j++)
+			if (i >= offset[j] && i <= (offset[j] + len[j])) {
+				in_hole = 1;
+				break;
+			}
+
+		if (in_hole) {
+			if (*(addr + i)) {
+				printf("%lu byte non-zero after punch\n", i);
+				exit(EXIT_FAILURE);
+			}
+			continue;
+		}
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	close(fd);
+	unlink(testfile);
+}
+
 int main(int argc, char **argv)
 {
+	int i;
+	size_t fd_size;
+	off_t offset[2], len[2];
+
 	if (geteuid() != 0) {
 		printf("Please run the benchmark as root\n");
 		exit(EXIT_FAILURE);
 	}
 
+	setbuf(stdout, NULL);
+
 	pagesize = getpagesize();
 	pageshift = ffs(pagesize) - 1;
 	pmd_pagesize = read_pmd_pagesize();
+	fd_size = 2 * pmd_pagesize;
 
 	split_pmd_thp();
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	for (i = 8; i >= 0; i--)
+		if (i != 1)
+			split_thp_in_pagecache_to_order(fd_size, i);
+
+	/*
+	 * for i is 1, truncate code in the kernel should create order-0 pages
+	 * instead of order-1 THPs, since order-1 THP is not supported. No error
+	 * is expected.
+	 */
+	for (i = 8; i >= 0; i--)
+		truncate_thp_in_pagecache_to_order(fd_size, i);
+
+	offset[0] = 123 * pagesize;
+	offset[1] = 4 * pagesize;
+	len[0] = 200 * pagesize;
+	len[1] = 16 * pagesize;
+	punch_hole_in_pagecache_thp(fd_size, offset, len, 2);
+
+	offset[0] = 259 * pagesize + pagesize / 2;
+	offset[1] = 33 * pagesize;
+	len[0] = 129 * pagesize;
+	len[1] = 16 * pagesize;
+	punch_hole_in_pagecache_thp(fd_size, offset, len, 2);
+
 	return 0;
 }
-- 
2.35.1


WARNING: multiple messages have this Message-ID (diff)
From: Zi Yan <zi.yan-vRdzynncJC4@public.gmane.org>
To: Matthew Wilcox <willy-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org
Cc: Roman Gushchin
	<roman.gushchin-fxUVXftIFDnyG1zEObXtfA@public.gmane.org>,
	Shuah Khan <shuah-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Yang Shi <shy828301-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,
	Miaohe Lin <linmiaohe-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>,
	Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	"Kirill A . Shutemov"
	<kirill.shutemov-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kselftest-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Zi Yan <ziy-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
Subject: [RFC PATCH 5/5] mm: huge_memory: enable debugfs to split huge pages to any order.
Date: Mon, 21 Mar 2022 10:21:28 -0400	[thread overview]
Message-ID: <20220321142128.2471199-6-zi.yan@sent.com> (raw)
In-Reply-To: <20220321142128.2471199-1-zi.yan-vRdzynncJC4@public.gmane.org>

From: Zi Yan <ziy-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>

It is used to test split_huge_page_to_list_to_order for pagecache THPs.
Also add test cases for split_huge_page_to_list_to_order via both
debugfs, truncating a file, and punching holes in a file.

Signed-off-by: Zi Yan <ziy-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
---
 mm/huge_memory.c                              |  26 ++-
 .../selftests/vm/split_huge_page_test.c       | 219 +++++++++++++++---
 2 files changed, 201 insertions(+), 44 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 76db0092a1e2..7645bb12fcbc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2856,7 +2856,7 @@ static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
-				unsigned long vaddr_end)
+				unsigned long vaddr_end, unsigned int new_order)
 {
 	int ret = 0;
 	struct task_struct *task;
@@ -2926,7 +2926,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		if (!trylock_page(page))
 			goto next;
 
-		if (!split_huge_page(page))
+		if (!split_huge_page_to_list_to_order(page, NULL, new_order))
 			split++;
 
 		unlock_page(page);
@@ -2944,7 +2944,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 }
 
 static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
-				pgoff_t off_end)
+				pgoff_t off_end, unsigned int new_order)
 {
 	struct filename *file;
 	struct file *candidate;
@@ -2984,7 +2984,7 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!trylock_page(fpage))
 			goto next;
 
-		if (!split_huge_page(fpage))
+		if (!split_huge_page_to_list_to_order(fpage, NULL, new_order))
 			split++;
 
 		unlock_page(fpage);
@@ -3009,10 +3009,14 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 {
 	static DEFINE_MUTEX(split_debug_mutex);
 	ssize_t ret;
-	/* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+	/*
+	 * hold pid, start_vaddr, end_vaddr, new_order or
+	 * file_path, off_start, off_end, new_order
+	 */
 	char input_buf[MAX_INPUT_BUF_SZ];
 	int pid;
 	unsigned long vaddr_start, vaddr_end;
+	unsigned int new_order = 0;
 
 	ret = mutex_lock_interruptible(&split_debug_mutex);
 	if (ret)
@@ -3041,29 +3045,29 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 			goto out;
 		}
 
-		ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
-		if (ret != 2) {
+		ret = sscanf(buf, "0x%lx,0x%lx,%d", &off_start, &off_end, &new_order);
+		if (ret != 2 && ret != 3) {
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = split_huge_pages_in_file(file_path, off_start, off_end);
+		ret = split_huge_pages_in_file(file_path, off_start, off_end, new_order);
 		if (!ret)
 			ret = input_len;
 
 		goto out;
 	}
 
-	ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+	ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d", &pid, &vaddr_start, &vaddr_end, &new_order);
 	if (ret == 1 && pid == 1) {
 		split_huge_pages_all();
 		ret = strlen(input_buf);
 		goto out;
-	} else if (ret != 3) {
+	} else if (ret != 3 && ret != 4) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order);
 	if (!ret)
 		ret = strlen(input_buf);
 out:
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 52497b7b9f1d..af01e7dca9c8 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -16,6 +16,7 @@
 #include <sys/mount.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include <time.h>
 
 uint64_t pagesize;
 unsigned int pageshift;
@@ -24,10 +25,11 @@ uint64_t pmd_pagesize;
 #define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
 #define SMAP_PATH "/proc/self/smaps"
+#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
-#define PID_FMT "%d,0x%lx,0x%lx"
-#define PATH_FMT "%s,0x%lx,0x%lx"
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
 
 #define PFN_MASK     ((1UL<<55)-1)
 #define KPF_THP      (1UL<<22)
@@ -75,23 +77,6 @@ static uint64_t read_pmd_pagesize(void)
 	return strtoul(buf, NULL, 10);
 }
 
-static int write_file(const char *path, const char *buf, size_t buflen)
-{
-	int fd;
-	ssize_t numwritten;
-
-	fd = open(path, O_WRONLY);
-	if (fd == -1)
-		return 0;
-
-	numwritten = write(fd, buf, buflen - 1);
-	close(fd);
-	if (numwritten < 1)
-		return 0;
-
-	return (unsigned int) numwritten;
-}
-
 static void write_debugfs(const char *fmt, ...)
 {
 	char input[INPUT_MAX];
@@ -106,11 +91,6 @@ static void write_debugfs(const char *fmt, ...)
 		printf("%s: Debugfs input is too long\n", __func__);
 		exit(EXIT_FAILURE);
 	}
-
-	if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) {
-		perror(SPLIT_DEBUGFS);
-		exit(EXIT_FAILURE);
-	}
 }
 
 #define MAX_LINE_LENGTH 500
@@ -124,7 +104,7 @@ static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
 	return false;
 }
 
-static uint64_t check_huge(void *addr)
+static uint64_t check_huge(void *addr, const char *prefix)
 {
 	uint64_t thp = 0;
 	int ret;
@@ -149,13 +129,13 @@ static uint64_t check_huge(void *addr)
 		goto err_out;
 
 	/*
-	 * Fetch the AnonHugePages: in the same block and check the number of
+	 * Fetch the @prefix in the same block and check the number of
 	 * hugepages.
 	 */
-	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+	if (!check_for_pattern(fp, prefix, buffer))
 		goto err_out;
 
-	if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) {
+	if (sscanf(&buffer[strlen(prefix)], "%10ld kB", &thp) != 1) {
 		printf("Reading smap error\n");
 		exit(EXIT_FAILURE);
 	}
@@ -184,7 +164,7 @@ void split_pmd_thp(void)
 	for (i = 0; i < len; i++)
 		one_page[i] = (char)i;
 
-	thp_size = check_huge(one_page);
+	thp_size = check_huge(one_page, "AnonHugePages:");
 	if (!thp_size) {
 		printf("No THP is allocated\n");
 		exit(EXIT_FAILURE);
@@ -192,7 +172,7 @@ void split_pmd_thp(void)
 
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
-		(uint64_t)one_page + len);
+		(uint64_t)one_page + len, 0);
 
 	for (i = 0; i < len; i++)
 		if (one_page[i] != (char)i) {
@@ -201,7 +181,7 @@ void split_pmd_thp(void)
 		}
 
 
-	thp_size = check_huge(one_page);
+	thp_size = check_huge(one_page, "AnonHugePages:");
 	if (thp_size) {
 		printf("Still %ld kB AnonHugePages not split\n", thp_size);
 		exit(EXIT_FAILURE);
@@ -249,7 +229,7 @@ void split_pte_mapped_thp(void)
 	for (i = 0; i < len; i++)
 		one_page[i] = (char)i;
 
-	thp_size = check_huge(one_page);
+	thp_size = check_huge(one_page, "AnonHugePages:");
 	if (!thp_size) {
 		printf("No THP is allocated\n");
 		exit(EXIT_FAILURE);
@@ -284,7 +264,7 @@ void split_pte_mapped_thp(void)
 
 	/* split all remapped THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
-		      (uint64_t)pte_mapped + pagesize * 4);
+		      (uint64_t)pte_mapped + pagesize * 4, 0);
 
 	/* smap does not show THPs after mremap, use kpageflags instead */
 	thp_size = 0;
@@ -371,20 +351,193 @@ void split_file_backed_thp(void)
 	printf("file-backed THP split test done, please check dmesg for more information\n");
 }
 
+void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+{
+	size_t i;
+	int dummy;
+
+	srand(time(NULL));
+
+	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
+	if (*fd == -1) {
+		perror("Failed to create a file at "THP_FS_PATH);
+		exit(EXIT_FAILURE);
+	}
+
+	for (i = 0; i < fd_size; i++) {
+		unsigned char byte = (unsigned char)i;
+
+		write(*fd, &byte, sizeof(byte));
+	}
+	close(*fd);
+	sync();
+	*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+	if (*fd == -1) {
+		perror("open drop_caches");
+		exit(EXIT_FAILURE);
+	}
+	if (write(*fd, "3", 1) != 1) {
+		perror("write to drop_caches");
+		exit(EXIT_FAILURE);
+	}
+	close(*fd);
+
+	*fd = open(testfile, O_RDWR);
+	if (*fd == -1) {
+		perror("Failed to open a file at "THP_FS_PATH);
+		exit(EXIT_FAILURE);
+	}
+
+	*addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+	if (*addr == (char *)-1) {
+		perror("cannot mmap");
+		exit(1);
+	}
+	madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+	for (size_t i = 0; i < fd_size; i++)
+		dummy += *(*addr + i);
+
+	if (!check_huge(*addr, "FilePmdMapped:")) {
+		printf("No pagecache THP generated, please mount a filesystem "
+			"supporting pagecache THP at "THP_FS_PATH"\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
+void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	printf("split %ld kB pagecache page to order %d ... ", fd_size >> 10, order);
+	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+	for (i = 0; i < fd_size; i++)
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			exit(EXIT_FAILURE);
+		}
+
+	close(fd);
+	unlink(testfile);
+	printf("done\n");
+}
+
+void truncate_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	printf("truncate %ld kB pagecache page to size %lu kB ... ", fd_size >> 10, 4UL << order);
+	ftruncate(fd, pagesize << order);
+
+	for (i = 0; i < (pagesize << order); i++)
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			exit(EXIT_FAILURE);
+		}
+
+	close(fd);
+	unlink(testfile);
+	printf("done\n");
+}
+
+void punch_hole_in_pagecache_thp(size_t fd_size, off_t offset[], off_t len[], int n)
+{
+	int fd, j;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	for (j = 0; j < n; j++) {
+		printf("addr: %lx, punch a hole at offset %ld kB with len %ld kB ... ",
+			(unsigned long)addr, offset[j] >> 10, len[j] >> 10);
+		fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset[j], len[j]);
+		printf("done\n");
+	}
+
+	for (i = 0; i < fd_size; i++) {
+		int in_hole = 0;
+
+		for (j = 0; j < n; j++)
+			if (i >= offset[j] && i <= (offset[j] + len[j])) {
+				in_hole = 1;
+				break;
+			}
+
+		if (in_hole) {
+			if (*(addr + i)) {
+				printf("%lu byte non-zero after punch\n", i);
+				exit(EXIT_FAILURE);
+			}
+			continue;
+		}
+		if (*(addr + i) != (char)i) {
+			printf("%lu byte corrupted in the file\n", i);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	close(fd);
+	unlink(testfile);
+}
+
 int main(int argc, char **argv)
 {
+	int i;
+	size_t fd_size;
+	off_t offset[2], len[2];
+
 	if (geteuid() != 0) {
 		printf("Please run the benchmark as root\n");
 		exit(EXIT_FAILURE);
 	}
 
+	setbuf(stdout, NULL);
+
 	pagesize = getpagesize();
 	pageshift = ffs(pagesize) - 1;
 	pmd_pagesize = read_pmd_pagesize();
+	fd_size = 2 * pmd_pagesize;
 
 	split_pmd_thp();
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	for (i = 8; i >= 0; i--)
+		if (i != 1)
+			split_thp_in_pagecache_to_order(fd_size, i);
+
+	/*
+	 * for i is 1, truncate code in the kernel should create order-0 pages
+	 * instead of order-1 THPs, since order-1 THP is not supported. No error
+	 * is expected.
+	 */
+	for (i = 8; i >= 0; i--)
+		truncate_thp_in_pagecache_to_order(fd_size, i);
+
+	offset[0] = 123 * pagesize;
+	offset[1] = 4 * pagesize;
+	len[0] = 200 * pagesize;
+	len[1] = 16 * pagesize;
+	punch_hole_in_pagecache_thp(fd_size, offset, len, 2);
+
+	offset[0] = 259 * pagesize + pagesize / 2;
+	offset[1] = 33 * pagesize;
+	len[0] = 129 * pagesize;
+	len[1] = 16 * pagesize;
+	punch_hole_in_pagecache_thp(fd_size, offset, len, 2);
+
 	return 0;
 }
-- 
2.35.1


  parent reply	other threads:[~2022-03-21 14:27 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-21 14:21 [RFC PATCH 0/5] Split a huge page to any lower order pages Zi Yan
2022-03-21 14:21 ` Zi Yan
2022-03-21 14:21 ` [RFC PATCH 1/5] mm: memcg: make memcg huge page split support any order split Zi Yan
2022-03-21 14:21   ` Zi Yan
2022-03-21 18:57   ` Roman Gushchin
2022-03-21 18:57     ` Roman Gushchin
2022-03-21 19:07     ` Zi Yan
2022-03-21 19:07       ` Zi Yan
2022-03-21 19:54       ` Matthew Wilcox
2022-03-21 19:54         ` Matthew Wilcox
2022-03-21 20:26         ` Zi Yan
2022-03-21 20:26           ` Zi Yan
2022-03-21 14:21 ` [RFC PATCH 2/5] mm: page_owner: add support for splitting to any order in split page_owner Zi Yan
2022-03-21 14:21   ` Zi Yan
2022-03-21 19:02   ` Roman Gushchin
2022-03-21 19:02     ` Roman Gushchin
2022-03-21 19:08     ` Zi Yan
2022-03-21 19:08       ` Zi Yan
2022-03-21 14:21 ` [RFC PATCH 3/5] mm: thp: split huge page to any lower order pages Zi Yan
2022-03-21 14:21   ` Zi Yan
2022-03-21 22:18   ` Roman Gushchin
2022-03-21 22:18     ` Roman Gushchin
2022-03-22 14:21     ` Zi Yan
2022-03-22 14:21       ` Zi Yan
2022-03-22  3:21   ` Miaohe Lin
2022-03-22  3:21     ` Miaohe Lin
2022-03-22 14:30     ` Zi Yan
2022-03-22 14:30       ` Zi Yan
2022-03-23  2:31       ` Miaohe Lin
2022-03-23  2:31         ` Miaohe Lin
2022-03-23 22:10         ` Zi Yan
2022-03-24  2:02           ` Miaohe Lin
2022-03-24  2:02             ` Miaohe Lin
2022-03-22 20:57   ` Yang Shi
2022-03-21 14:21 ` [RFC PATCH 4/5] mm: truncate: split huge page cache page to a non-zero order if possible Zi Yan
2022-03-21 14:21   ` Zi Yan
2022-03-21 22:32   ` Roman Gushchin
2022-03-21 22:32     ` Roman Gushchin
2022-03-22 14:19     ` Zi Yan
2022-03-22 14:19       ` Zi Yan
2022-03-23  6:40   ` [mm] 2757cee2d6: UBSAN:shift-out-of-bounds_in_include/linux/log2.h kernel test robot
2022-03-23  6:40     ` kernel test robot
2022-03-21 14:21 ` Zi Yan [this message]
2022-03-21 14:21   ` [RFC PATCH 5/5] mm: huge_memory: enable debugfs to split huge pages to any order Zi Yan
2022-03-21 22:23   ` Roman Gushchin
2022-03-21 22:23     ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220321142128.2471199-6-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=cgroups@vger.kernel.org \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=roman.gushchin@linux.dev \
    --cc=shuah@kernel.org \
    --cc=shy828301@gmail.com \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.