Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
* [RFC] nvme-cli: Support for hugetlbfs
@ 2019-11-05 18:34 Keith Busch
  2019-11-05 20:35 ` Chaitanya Kulkarni
  2019-11-06  0:20 ` Jens Axboe
  0 siblings, 2 replies; 4+ messages in thread
From: Keith Busch @ 2019-11-05 18:34 UTC (permalink / raw)
  To: linux-nvme, Jens Axboe; +Cc: Keith Busch

Some commands require exceptionally large data transfers, and the
kernel driver supports only a limited number of phyiscal segments per
command. To help support large transfers, try to allocate physically
contiguous space via hugetlbfs.

Rather than deal with the nuances of allocating huge pages, this patch
requires libhugetlbfs be installed on the host system.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 Makefile |  7 ++++++
 nvme.c   | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 1dd8c0e..8bcf5a5 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,7 @@ CFLAGS ?= -O2 -g -Wall -Werror
 override CFLAGS += -std=gnu99 -I.
 override CPPFLAGS += -D_GNU_SOURCE -D__CHECK_ENDIAN__
 LIBUUID = $(shell $(LD) -o /dev/null -luuid >/dev/null 2>&1; echo $$?)
+LIBHUGETLBFS = $(shell $(LD) -o /dev/null -lhugetlbfs >/dev/null 2>&1; echo $$?)
 HAVE_SYSTEMD = $(shell pkg-config --exists systemd  --atleast-version=232; echo $$?)
 NVME = nvme
 INSTALL ?= install
@@ -21,6 +22,12 @@ ifeq ($(LIBUUID),0)
 	override LIB_DEPENDS += uuid
 endif
 
+ifeq ($(LIBHUGETLBFS),0)
+	override LDFLAGS += -lhugetlbfs
+	override CFLAGS += -DLIBHUGETLBFS
+	override LIB_DEPENDS += hugetlbfs
+endif
+
 INC=-Iutil
 
 ifeq ($(HAVE_SYSTEMD),0)
diff --git a/nvme.c b/nvme.c
index 0823267..254bc7e 100644
--- a/nvme.c
+++ b/nvme.c
@@ -37,6 +37,10 @@
 #include <dirent.h>
 #include <libgen.h>
 
+#ifdef LIBHUGETLBFS
+#include <hugetlbfs.h>
+#endif
+
 #include <linux/fs.h>
 
 #include <sys/ioctl.h>
@@ -93,6 +97,55 @@ const char *conarg_host_traddr = "host_traddr";
 const char *dev = "/dev/";
 const char *subsys_dir = "/sys/class/nvme-subsystem/";
 
+static void *__nvme_alloc(size_t len, bool *huge)
+{
+	void *p;
+
+	if (!posix_memalign(&p, getpagesize(), len)) {
+		*huge = false;
+		memset(p, 0, len);
+		return p;
+	}
+	return NULL;
+}
+
+#ifdef LIBHUGETLBFS
+#define HUGE_MIN 0x80000
+
+static void nvme_free(void *p, bool huge)
+{
+	if (huge)
+		free_hugepage_region(p);
+	else
+		free(p);
+}
+
+static void *nvme_alloc(size_t len, bool *huge)
+{
+	void *p;
+
+	if (len < HUGE_MIN)
+		return __nvme_alloc(len, huge);
+
+	p = get_hugepage_region(len, GHP_DEFAULT);
+	if (!p)
+		return __nvme_alloc(len, huge);
+
+	*huge = true;
+	return p;
+}
+#else
+static void nvme_free(void *p, bool huge)
+{
+	free(p);
+}
+
+static void *nvme_alloc(size_t len, bool *huge)
+{
+	return __nvme_alloc(len, huge);
+}
+#endif
+
 static int open_dev(char *dev)
 {
 	int err, fd;
@@ -2362,6 +2415,7 @@ static int fw_download(int argc, char **argv, struct command *cmd, struct plugin
 	unsigned int fw_size;
 	struct stat sb;
 	void *fw_buf, *buf;
+	bool huge;
 
 	struct config {
 		char  *fw;
@@ -2409,7 +2463,9 @@ static int fw_download(int argc, char **argv, struct command *cmd, struct plugin
 		err = -EINVAL;
 		goto close_fw_fd;
 	}
-	if (posix_memalign(&fw_buf, getpagesize(), fw_size)) {
+
+	fw_buf = nvme_alloc(fw_size, &huge);
+	if (!fw_buf) {
 		fprintf(stderr, "No memory for f/w size:%d\n", fw_size);
 		err = -ENOMEM;
 		goto close_fw_fd;
@@ -2443,7 +2499,7 @@ static int fw_download(int argc, char **argv, struct command *cmd, struct plugin
 		printf("Firmware download success\n");
 
 free:
-	free(buf);
+	nvme_free(buf, huge);
 close_fw_fd:
 	close(fw_fd);
 close_fd:
@@ -4144,6 +4200,7 @@ static int submit_io(int opcode, char *command, const char *desc,
 	__u32 dsmgmt = 0;
 	int phys_sector_size = 0;
 	long long buffer_size = 0;
+	bool huge;
 
 	const char *start_block = "64-bit addr of first block to access";
 	const char *block_count = "number of blocks (zeroes based) on device to access";
@@ -4284,12 +4341,12 @@ static int submit_io(int opcode, char *command, const char *desc,
 		buffer_size = cfg.data_size;
 	}
 
-	if (posix_memalign(&buffer, getpagesize(), buffer_size)) {
+	buffer = nvme_alloc(buffer_size, &huge);
+	if (!buffer) {
 		fprintf(stderr, "can not allocate io payload\n");
 		err = -ENOMEM;
 		goto close_mfd;
 	}
-	memset(buffer, 0, buffer_size);
 
 	if (cfg.metadata_size) {
 		mbuffer = malloc(cfg.metadata_size);
@@ -4368,7 +4425,7 @@ free_mbuffer:
 	if (cfg.metadata_size)
 		free(mbuffer);
 free_buffer:
-	free(buffer);
+	nvme_free(buffer, huge);
 close_mfd:
 	if (strlen(cfg.metadata))
 		close(mfd);
@@ -4798,6 +4855,7 @@ static int passthru(int argc, char **argv, int ioctl_cmd, const char *desc, stru
 	void *data = NULL, *metadata = NULL;
 	int err = 0, wfd = STDIN_FILENO, fd;
 	__u32 result;
+	bool huge;
 
 	struct config {
 		__u8  opcode;
@@ -4920,7 +4978,8 @@ static int passthru(int argc, char **argv, int ioctl_cmd, const char *desc, stru
 		memset(metadata, cfg.prefill, cfg.metadata_len);
 	}
 	if (cfg.data_len) {
-		if (posix_memalign(&data, getpagesize(), cfg.data_len)) {
+		data = nvme_alloc(cfg.data_len, &huge);
+		if (!data) {
 			fprintf(stderr, "can not allocate data payload\n");
 			err = -ENOMEM;
 			goto free_metadata;
@@ -4983,7 +5042,7 @@ static int passthru(int argc, char **argv, int ioctl_cmd, const char *desc, stru
 
 free_data:
 	if (cfg.data_len)
-		free(data);
+		nvme_free(data, huge);
 free_metadata:
 	if (cfg.metadata_len)
 		free(metadata);
-- 
2.21.0


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] nvme-cli: Support for hugetlbfs
  2019-11-05 18:34 [RFC] nvme-cli: Support for hugetlbfs Keith Busch
@ 2019-11-05 20:35 ` Chaitanya Kulkarni
  2019-11-06  0:49   ` Keith Busch
  2019-11-06  0:20 ` Jens Axboe
  1 sibling, 1 reply; 4+ messages in thread
From: Chaitanya Kulkarni @ 2019-11-05 20:35 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, Jens Axboe

Thanks Keith, this very useful, looks good.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>

On 11/05/2019 10:34 AM, Keith Busch wrote:
> Some commands require exceptionally large data transfers, and the
> kernel driver supports only a limited number of phyiscal segments per
> command. To help support large transfers, try to allocate physically
> contiguous space via hugetlbfs.
>
> Rather than deal with the nuances of allocating huge pages, this patch
> requires libhugetlbfs be installed on the host system.
>
> Signed-off-by: Keith Busch<kbusch@kernel.org>
> ---


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] nvme-cli: Support for hugetlbfs
  2019-11-05 18:34 [RFC] nvme-cli: Support for hugetlbfs Keith Busch
  2019-11-05 20:35 ` Chaitanya Kulkarni
@ 2019-11-06  0:20 ` Jens Axboe
  1 sibling, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2019-11-06  0:20 UTC (permalink / raw)
  To: Keith Busch, linux-nvme

On 11/5/19 11:34 AM, Keith Busch wrote:
> Some commands require exceptionally large data transfers, and the
> kernel driver supports only a limited number of phyiscal segments per
> command. To help support large transfers, try to allocate physically
> contiguous space via hugetlbfs.
> 
> Rather than deal with the nuances of allocating huge pages, this patch
> requires libhugetlbfs be installed on the host system.

LGTM, and solves a real issue with user issued admin commands that we
can't break up.

Reviewed-by: Jens Axboe <axboe@kernel.dk>

-- 
Jens Axboe


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] nvme-cli: Support for hugetlbfs
  2019-11-05 20:35 ` Chaitanya Kulkarni
@ 2019-11-06  0:49   ` Keith Busch
  0 siblings, 0 replies; 4+ messages in thread
From: Keith Busch @ 2019-11-06  0:49 UTC (permalink / raw)
  To: Chaitanya Kulkarni; +Cc: Jens Axboe, linux-nvme

Thanks for the reviews! I've gone ahead and pushed this to master.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-05 18:34 [RFC] nvme-cli: Support for hugetlbfs Keith Busch
2019-11-05 20:35 ` Chaitanya Kulkarni
2019-11-06  0:49   ` Keith Busch
2019-11-06  0:20 ` Jens Axboe

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git