All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vishal Verma <vishal.l.verma@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
	Pavel Tatashin <pasha.tatashin@soleen.com>
Subject: [ndctl PATCH v9 04/13] libdaxctl: add a 'daxctl_memory' object for memory based operations
Date: Wed, 31 Jul 2019 18:29:23 -0600	[thread overview]
Message-ID: <20190801002932.26430-5-vishal.l.verma@intel.com> (raw)
In-Reply-To: <20190801002932.26430-1-vishal.l.verma@intel.com>

Introduce a new 'daxctl_memory' object, which will be used for
operations related to managing dax devices in 'system-memory' modes.

Add libdaxctl APIs to get the target_node of a DAX device, and to
online, offline, and query the state of hotplugged memory sections
associated with a given device.

This adds the following new interfaces:

  daxctl_dev_get_target_node
  daxctl_dev_get_memory
  daxctl_memory_get_dev
  daxctl_memory_get_node_path
  daxctl_memory_get_block_size
  daxctl_memory_online
  daxctl_memory_offline
  daxctl_memory_is_online
  daxctl_memory_num_sections

Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
[for the memblock-already-online TOCTOU hole]
Reported-by: Fan Du <fan.du@intel.com>
Tested-by: Fan Du <fan.du@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 daxctl/lib/libdaxctl-private.h |  18 ++
 daxctl/lib/libdaxctl.c         | 384 +++++++++++++++++++++++++++++++++
 daxctl/lib/libdaxctl.sym       |   9 +
 daxctl/libdaxctl.h             |  11 +
 4 files changed, 422 insertions(+)

diff --git a/daxctl/lib/libdaxctl-private.h b/daxctl/lib/libdaxctl-private.h
index fee67d1..01091de 100644
--- a/daxctl/lib/libdaxctl-private.h
+++ b/daxctl/lib/libdaxctl-private.h
@@ -39,6 +39,13 @@ static const char *dax_modules[] = {
 	[DAXCTL_DEV_MODE_RAM] = "kmem",
 };
 
+enum memory_op {
+	MEM_SET_OFFLINE,
+	MEM_SET_ONLINE,
+	MEM_IS_ONLINE,
+	MEM_COUNT,
+};
+
 /**
  * struct daxctl_region - container for dax_devices
  */
@@ -70,8 +77,19 @@ struct daxctl_dev {
 	struct kmod_module *module;
 	struct kmod_list *kmod_list;
 	struct daxctl_region *region;
+	struct daxctl_memory *mem;
+	int target_node;
+};
+
+struct daxctl_memory {
+	struct daxctl_dev *dev;
+	void *mem_buf;
+	size_t buf_len;
+	char *node_path;
+	unsigned long block_size;
 };
 
+
 static inline int check_kmod(struct kmod_ctx *kmod_ctx)
 {
 	return kmod_ctx ? 0 : -ENXIO;
diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
index aa0d2f2..949c56f 100644
--- a/daxctl/lib/libdaxctl.c
+++ b/daxctl/lib/libdaxctl.c
@@ -200,6 +200,15 @@ DAXCTL_EXPORT void daxctl_region_get_uuid(struct daxctl_region *region, uuid_t u
 	uuid_copy(uu, region->uuid);
 }
 
+static void free_mem(struct daxctl_dev *dev)
+{
+	if (dev && dev->mem) {
+		free(dev->mem->node_path);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
+}
+
 static void free_dev(struct daxctl_dev *dev, struct list_head *head)
 {
 	if (head)
@@ -207,6 +216,7 @@ static void free_dev(struct daxctl_dev *dev, struct list_head *head)
 	kmod_module_unref_list(dev->kmod_list);
 	free(dev->dev_buf);
 	free(dev->dev_path);
+	free_mem(dev);
 	free(dev);
 }
 
@@ -380,6 +390,94 @@ static struct kmod_list *to_module_list(struct daxctl_ctx *ctx,
 	return list;
 }
 
+static int dev_is_system_ram_capable(struct daxctl_dev *dev)
+{
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	char *mod_path, *mod_base;
+	char path[200];
+	const int len = sizeof(path);
+
+	if (!device_model_is_dax_bus(dev))
+		return false;
+
+	if (!daxctl_dev_is_enabled(dev))
+		return false;
+
+	if (snprintf(path, len, "%s/driver/module", dev->dev_path) >= len) {
+		err(ctx, "%s: buffer too small!\n", devname);
+		return false;
+	}
+
+	mod_path = realpath(path, NULL);
+	if (!mod_path)
+		return false;
+
+	mod_base = basename(mod_path);
+	if (strcmp(mod_base, dax_modules[DAXCTL_DEV_MODE_RAM]) == 0) {
+		free(mod_path);
+		return true;
+	}
+
+	free(mod_path);
+	return false;
+}
+
+/*
+ * This checks for the device to be in system-ram mode, so calling
+ * daxctl_dev_get_memory() on a devdax mode device will always return NULL.
+ */
+static struct daxctl_memory *daxctl_dev_alloc_mem(struct daxctl_dev *dev)
+{
+	const char *size_path = "/sys/devices/system/memory/block_size_bytes";
+	const char *node_base = "/sys/devices/system/node/node";
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	struct daxctl_memory *mem;
+	char buf[SYSFS_ATTR_SIZE];
+	int node_num;
+
+	if (!dev_is_system_ram_capable(dev))
+		return NULL;
+
+	mem = calloc(1, sizeof(*mem));
+	if (!mem)
+		return NULL;
+
+	mem->dev = dev;
+
+	if (sysfs_read_attr(ctx, size_path, buf) == 0) {
+		mem->block_size = strtoul(buf, NULL, 16);
+		if (mem->block_size == 0 || mem->block_size == ULONG_MAX) {
+			err(ctx, "%s: Unable to determine memblock size: %s\n",
+				devname, strerror(errno));
+			mem->block_size = 0;
+		}
+	}
+
+	node_num = daxctl_dev_get_target_node(dev);
+	if (node_num >= 0) {
+		if (asprintf(&mem->node_path, "%s%d", node_base,
+				node_num) < 0) {
+			err(ctx, "%s: Unable to set node_path\n", devname);
+			goto err_mem;
+		}
+	}
+
+	mem->mem_buf = calloc(1, strlen(node_base) + 256);
+	if (!mem->mem_buf)
+		goto err_node;
+	mem->buf_len = strlen(node_base) + 256;
+
+	return mem;
+
+err_node:
+	free(mem->node_path);
+err_mem:
+	free(mem);
+	return NULL;
+}
+
 static void *add_dax_dev(void *parent, int id, const char *daxdev_base)
 {
 	const char *devname = devpath_to_devname(daxdev_base);
@@ -435,6 +533,12 @@ static void *add_dax_dev(void *parent, int id, const char *daxdev_base)
 	if (rc == 0)
 		dev->kmod_list = to_module_list(ctx, buf);
 
+	sprintf(path, "%s/target_node", daxdev_base);
+	if (sysfs_read_attr(ctx, path, buf) == 0)
+		dev->target_node = strtol(buf, NULL, 0);
+	else
+		dev->target_node = -1;
+
 	daxctl_dev_foreach(region, dev_dup)
 		if (dev_dup->id == dev->id) {
 			free_dev(dev, NULL);
@@ -862,6 +966,9 @@ DAXCTL_EXPORT int daxctl_dev_disable(struct daxctl_dev *dev)
 	if (!daxctl_dev_is_enabled(dev))
 		return 0;
 
+	/* If there is a memory object, first free that */
+	free_mem(dev);
+
 	daxctl_unbind(ctx, dev->dev_path);
 
 	if (daxctl_dev_is_enabled(dev)) {
@@ -944,3 +1051,280 @@ DAXCTL_EXPORT unsigned long long daxctl_dev_get_size(struct daxctl_dev *dev)
 {
 	return dev->size;
 }
+
+DAXCTL_EXPORT int daxctl_dev_get_target_node(struct daxctl_dev *dev)
+{
+	return dev->target_node;
+}
+
+DAXCTL_EXPORT struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev)
+{
+	if (dev->mem)
+		return dev->mem;
+	else
+		return daxctl_dev_alloc_mem(dev);
+}
+
+DAXCTL_EXPORT struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem)
+{
+	return mem->dev;
+}
+
+DAXCTL_EXPORT const char *daxctl_memory_get_node_path(struct daxctl_memory *mem)
+{
+	return mem->node_path;
+}
+
+DAXCTL_EXPORT unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem)
+{
+	return mem->block_size;
+}
+
+static int online_one_memblock(struct daxctl_dev *dev, char *path)
+{
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	const char *mode = "online_movable";
+	char buf[SYSFS_ATTR_SIZE];
+	int rc;
+
+	rc = sysfs_read_attr(ctx, path, buf);
+	if (rc) {
+		err(ctx, "%s: Failed to read %s: %s\n",
+			devname, path, strerror(-rc));
+		return rc;
+	}
+
+	/*
+	 * if already online, possibly due to kernel config or a udev rule,
+	 * there is nothing to do and we can skip over the memblock
+	 */
+	if (strncmp(buf, "online", 6) == 0)
+		return 1;
+
+	rc = sysfs_write_attr_quiet(ctx, path, mode);
+	if (rc) {
+		/*
+		 * While we performed an already-online check above, there
+		 * is still a TOCTOU hole where someone (such as a udev rule)
+		 * may have raced to online the memory. In such a case,
+		 * the sysfs store will fail, however we can check for this
+		 * by simply reading the state again. If it changed to the
+		 * desired state, then we don't have to error out.
+		 */
+		if (sysfs_read_attr(ctx, path, buf) == 0) {
+			if (strncmp(buf, "online", 6) == 0)
+				return 1;
+		}
+		err(ctx, "%s: Failed to online %s: %s\n",
+			devname, path, strerror(-rc));
+	}
+	return rc;
+}
+
+static int offline_one_memblock(struct daxctl_dev *dev, char *path)
+{
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	const char *mode = "offline";
+	char buf[SYSFS_ATTR_SIZE];
+	int rc;
+
+	rc = sysfs_read_attr(ctx, path, buf);
+	if (rc) {
+		err(ctx, "%s: Failed to read %s: %s\n",
+			devname, path, strerror(-rc));
+		return rc;
+	}
+
+	/* if already offline, there is nothing to do */
+	if (strncmp(buf, "offline", 7) == 0)
+		return 1;
+
+	rc = sysfs_write_attr_quiet(ctx, path, mode);
+	if (rc) {
+		/* Close the TOCTOU hole like in online_one_memblock() above */
+		if (sysfs_read_attr(ctx, path, buf) == 0) {
+			if (strncmp(buf, "offline", 7) == 0)
+				return 1;
+		}
+		err(ctx, "%s: Failed to offline %s: %s\n",
+			devname, path, strerror(-rc));
+	}
+	return rc;
+}
+
+static int memblock_is_online(struct daxctl_dev *dev, char *path)
+{
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	char buf[SYSFS_ATTR_SIZE];
+	int rc;
+
+	rc = sysfs_read_attr(ctx, path, buf);
+	if (rc) {
+		err(ctx, "%s: Failed to read %s: %s\n",
+			devname, path, strerror(-rc));
+		return rc;
+	}
+
+	if (strncmp(buf, "online", 6) == 0)
+		return 1;
+
+	/* offline */
+	return 0;
+}
+
+static bool memblock_in_dev(struct daxctl_dev *dev, const char *memblock)
+{
+	struct daxctl_memory *mem = daxctl_dev_get_memory(dev);
+	const char *mem_base = "/sys/devices/system/memory/";
+	unsigned long long memblock_res, dev_start, dev_end;
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	unsigned long memblock_size;
+	int path_len = mem->buf_len;
+	char buf[SYSFS_ATTR_SIZE];
+	unsigned long phys_index;
+	char *path = mem->mem_buf;
+
+	if (snprintf(path, path_len, "%s/%s/phys_index",
+			mem_base, memblock) < 0)
+		return false;
+
+	if (sysfs_read_attr(ctx, path, buf) == 0) {
+		phys_index = strtoul(buf, NULL, 16);
+		if (phys_index == 0 || phys_index == ULONG_MAX) {
+			err(ctx, "%s: %s: Unable to determine phys_index: %s\n",
+				devname, memblock, strerror(errno));
+			return false;
+		}
+	} else {
+		err(ctx, "%s: %s: Unable to determine phys_index: %s\n",
+			devname, memblock, strerror(errno));
+		return false;
+	}
+
+	dev_start = daxctl_dev_get_resource(dev);
+	if (!dev_start) {
+		err(ctx, "%s: Unable to determine resource\n", devname);
+		return false;
+	}
+	dev_end = dev_start + daxctl_dev_get_size(dev);
+
+	memblock_size = daxctl_memory_get_block_size(mem);
+	if (!memblock_size) {
+		err(ctx, "%s: Unable to determine memory block size\n",
+			devname);
+		return false;
+	}
+	memblock_res = phys_index * memblock_size;
+
+	if (memblock_res >= dev_start && memblock_res <= dev_end)
+		return true;
+
+	return false;
+}
+
+static int op_for_one_memblock(struct daxctl_memory *mem, char *path,
+		enum memory_op op)
+{
+	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	int rc;
+
+	switch (op) {
+	case MEM_SET_ONLINE:
+		return online_one_memblock(dev, path);
+	case MEM_SET_OFFLINE:
+		return offline_one_memblock(dev, path);
+	case MEM_IS_ONLINE:
+		rc = memblock_is_online(dev, path);
+		if (rc < 0)
+			return rc;
+		/*
+		 * Retain the 'normal' semantics for if (memblock_is_online()),
+		 * but since count needs rc == 0, we'll just flip rc for this op
+		 */
+		return !rc;
+	case MEM_COUNT:
+		return 0;
+	}
+
+	err(ctx, "%s: BUG: unknown op: %d\n", devname, op);
+	return -EINVAL;
+}
+
+static int daxctl_memory_op(struct daxctl_memory *mem, enum memory_op op)
+{
+	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	const char *node_path;
+	int rc, count = 0;
+	struct dirent *de;
+	DIR *node_dir;
+
+	node_path = daxctl_memory_get_node_path(mem);
+	if (!node_path) {
+		err(ctx, "%s: Failed to get node_path\n", devname);
+		return -ENXIO;
+	}
+
+	node_dir = opendir(node_path);
+	if (!node_dir)
+		return -errno;
+
+	errno = 0;
+	while ((de = readdir(node_dir)) != NULL) {
+		char *path = mem->mem_buf;
+		int len = mem->buf_len;
+
+		if (strncmp(de->d_name, "memory", 6) == 0) {
+			if (!memblock_in_dev(dev, de->d_name))
+				continue;
+			rc = snprintf(path, len, "%s/%s/state",
+				node_path, de->d_name);
+			if (rc < 0) {
+				rc = -ENOMEM;
+				goto out_dir;
+			}
+			rc = op_for_one_memblock(mem, path, op);
+			if (rc < 0)
+				goto out_dir;
+			if (rc == 0)
+				count++;
+		}
+		errno = 0;
+	}
+	if (errno) {
+		rc = -errno;
+		goto out_dir;
+	}
+	rc = count;
+
+out_dir:
+	closedir(node_dir);
+	return rc;
+}
+
+DAXCTL_EXPORT int daxctl_memory_online(struct daxctl_memory *mem)
+{
+	return daxctl_memory_op(mem, MEM_SET_ONLINE);
+}
+
+DAXCTL_EXPORT int daxctl_memory_offline(struct daxctl_memory *mem)
+{
+	return daxctl_memory_op(mem, MEM_SET_OFFLINE);
+}
+
+DAXCTL_EXPORT int daxctl_memory_is_online(struct daxctl_memory *mem)
+{
+	return daxctl_memory_op(mem, MEM_IS_ONLINE);
+}
+
+DAXCTL_EXPORT int daxctl_memory_num_sections(struct daxctl_memory *mem)
+{
+	return daxctl_memory_op(mem, MEM_COUNT);
+}
diff --git a/daxctl/lib/libdaxctl.sym b/daxctl/lib/libdaxctl.sym
index 1692624..bc18604 100644
--- a/daxctl/lib/libdaxctl.sym
+++ b/daxctl/lib/libdaxctl.sym
@@ -59,4 +59,13 @@ global:
 	daxctl_dev_enable_devdax;
 	daxctl_dev_enable_ram;
 	daxctl_dev_get_resource;
+	daxctl_dev_get_target_node;
+	daxctl_dev_get_memory;
+	daxctl_memory_get_dev;
+	daxctl_memory_get_node_path;
+	daxctl_memory_get_block_size;
+	daxctl_memory_online;
+	daxctl_memory_offline;
+	daxctl_memory_is_online;
+	daxctl_memory_num_sections;
 } LIBDAXCTL_5;
diff --git a/daxctl/libdaxctl.h b/daxctl/libdaxctl.h
index adf55f3..fb6c3b1 100644
--- a/daxctl/libdaxctl.h
+++ b/daxctl/libdaxctl.h
@@ -73,6 +73,17 @@ int daxctl_dev_is_enabled(struct daxctl_dev *dev);
 int daxctl_dev_disable(struct daxctl_dev *dev);
 int daxctl_dev_enable_devdax(struct daxctl_dev *dev);
 int daxctl_dev_enable_ram(struct daxctl_dev *dev);
+int daxctl_dev_get_target_node(struct daxctl_dev *dev);
+
+struct daxctl_memory;
+struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev);
+struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem);
+const char *daxctl_memory_get_node_path(struct daxctl_memory *mem);
+unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem);
+int daxctl_memory_online(struct daxctl_memory *mem);
+int daxctl_memory_offline(struct daxctl_memory *mem);
+int daxctl_memory_is_online(struct daxctl_memory *mem);
+int daxctl_memory_num_sections(struct daxctl_memory *mem);
 
 #define daxctl_dev_foreach(region, dev) \
         for (dev = daxctl_dev_get_first(region); \
-- 
2.20.1

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

  parent reply	other threads:[~2019-08-01  0:32 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-01  0:29 [ndctl PATCH v9 00/13] daxctl: add a new reconfigure-device command Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 01/13] libdaxctl: add interfaces to get ctx and check device state Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 02/13] libdaxctl: add interfaces to enable/disable devices Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 03/13] libdaxctl: add an interface to retrieve the device resource Vishal Verma
2019-08-01  0:29 ` Vishal Verma [this message]
2019-08-05 23:57   ` [ndctl PATCH v9 04/13] libdaxctl: add a 'daxctl_memory' object for memory based operations Verma, Vishal L
2019-08-01  0:29 ` [ndctl PATCH v9 05/13] daxctl/list: add target_node for device listings Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 06/13] daxctl/list: display the mode for a dax device Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 07/13] daxctl: add a new reconfigure-device command Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 08/13] Documentation/daxctl: add a man page for daxctl-reconfigure-device Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 09/13] daxctl: add commands to online and offline memory Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 10/13] Documentation: Add man pages for daxctl-{on, off}line-memory Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 11/13] contrib/ndctl: fix region-id completions for daxctl Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 12/13] contrib/ndctl: add bash-completion for the new daxctl commands Vishal Verma
2019-08-01  0:29 ` [ndctl PATCH v9 13/13] test: Add a unit test for daxctl-reconfigure-device and friends Vishal Verma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190801002932.26430-5-vishal.l.verma@intel.com \
    --to=vishal.l.verma@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-nvdimm@lists.01.org \
    --cc=pasha.tatashin@soleen.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.