From: Vishal Verma <vishal.l.verma@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
Pavel Tatashin <pasha.tatashin@soleen.com>
Subject: [ndctl PATCH v9 04/13] libdaxctl: add a 'daxctl_memory' object for memory based operations
Date: Wed, 31 Jul 2019 18:29:23 -0600 [thread overview]
Message-ID: <20190801002932.26430-5-vishal.l.verma@intel.com> (raw)
In-Reply-To: <20190801002932.26430-1-vishal.l.verma@intel.com>
Introduce a new 'daxctl_memory' object, which will be used for
operations related to managing dax devices in 'system-memory' modes.
Add libdaxctl APIs to get the target_node of a DAX device, and to
online, offline, and query the state of hotplugged memory sections
associated with a given device.
This adds the following new interfaces:
daxctl_dev_get_target_node
daxctl_dev_get_memory
daxctl_memory_get_dev
daxctl_memory_get_node_path
daxctl_memory_get_block_size
daxctl_memory_online
daxctl_memory_offline
daxctl_memory_is_online
daxctl_memory_num_sections
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
[for the memblock-already-online TOCTOU hole]
Reported-by: Fan Du <fan.du@intel.com>
Tested-by: Fan Du <fan.du@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
daxctl/lib/libdaxctl-private.h | 18 ++
daxctl/lib/libdaxctl.c | 384 +++++++++++++++++++++++++++++++++
daxctl/lib/libdaxctl.sym | 9 +
daxctl/libdaxctl.h | 11 +
4 files changed, 422 insertions(+)
diff --git a/daxctl/lib/libdaxctl-private.h b/daxctl/lib/libdaxctl-private.h
index fee67d1..01091de 100644
--- a/daxctl/lib/libdaxctl-private.h
+++ b/daxctl/lib/libdaxctl-private.h
@@ -39,6 +39,13 @@ static const char *dax_modules[] = {
[DAXCTL_DEV_MODE_RAM] = "kmem",
};
+enum memory_op {
+ MEM_SET_OFFLINE,
+ MEM_SET_ONLINE,
+ MEM_IS_ONLINE,
+ MEM_COUNT,
+};
+
/**
* struct daxctl_region - container for dax_devices
*/
@@ -70,8 +77,19 @@ struct daxctl_dev {
struct kmod_module *module;
struct kmod_list *kmod_list;
struct daxctl_region *region;
+ struct daxctl_memory *mem;
+ int target_node;
+};
+
+struct daxctl_memory {
+ struct daxctl_dev *dev;
+ void *mem_buf;
+ size_t buf_len;
+ char *node_path;
+ unsigned long block_size;
};
+
static inline int check_kmod(struct kmod_ctx *kmod_ctx)
{
return kmod_ctx ? 0 : -ENXIO;
diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
index aa0d2f2..949c56f 100644
--- a/daxctl/lib/libdaxctl.c
+++ b/daxctl/lib/libdaxctl.c
@@ -200,6 +200,15 @@ DAXCTL_EXPORT void daxctl_region_get_uuid(struct daxctl_region *region, uuid_t u
uuid_copy(uu, region->uuid);
}
+static void free_mem(struct daxctl_dev *dev)
+{
+ if (dev && dev->mem) {
+ free(dev->mem->node_path);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
+}
+
static void free_dev(struct daxctl_dev *dev, struct list_head *head)
{
if (head)
@@ -207,6 +216,7 @@ static void free_dev(struct daxctl_dev *dev, struct list_head *head)
kmod_module_unref_list(dev->kmod_list);
free(dev->dev_buf);
free(dev->dev_path);
+ free_mem(dev);
free(dev);
}
@@ -380,6 +390,94 @@ static struct kmod_list *to_module_list(struct daxctl_ctx *ctx,
return list;
}
+static int dev_is_system_ram_capable(struct daxctl_dev *dev)
+{
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ char *mod_path, *mod_base;
+ char path[200];
+ const int len = sizeof(path);
+
+ if (!device_model_is_dax_bus(dev))
+ return false;
+
+ if (!daxctl_dev_is_enabled(dev))
+ return false;
+
+ if (snprintf(path, len, "%s/driver/module", dev->dev_path) >= len) {
+ err(ctx, "%s: buffer too small!\n", devname);
+ return false;
+ }
+
+ mod_path = realpath(path, NULL);
+ if (!mod_path)
+ return false;
+
+ mod_base = basename(mod_path);
+ if (strcmp(mod_base, dax_modules[DAXCTL_DEV_MODE_RAM]) == 0) {
+ free(mod_path);
+ return true;
+ }
+
+ free(mod_path);
+ return false;
+}
+
+/*
+ * This checks for the device to be in system-ram mode, so calling
+ * daxctl_dev_get_memory() on a devdax mode device will always return NULL.
+ */
+static struct daxctl_memory *daxctl_dev_alloc_mem(struct daxctl_dev *dev)
+{
+ const char *size_path = "/sys/devices/system/memory/block_size_bytes";
+ const char *node_base = "/sys/devices/system/node/node";
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ struct daxctl_memory *mem;
+ char buf[SYSFS_ATTR_SIZE];
+ int node_num;
+
+ if (!dev_is_system_ram_capable(dev))
+ return NULL;
+
+ mem = calloc(1, sizeof(*mem));
+ if (!mem)
+ return NULL;
+
+ mem->dev = dev;
+
+ if (sysfs_read_attr(ctx, size_path, buf) == 0) {
+ mem->block_size = strtoul(buf, NULL, 16);
+ if (mem->block_size == 0 || mem->block_size == ULONG_MAX) {
+ err(ctx, "%s: Unable to determine memblock size: %s\n",
+ devname, strerror(errno));
+ mem->block_size = 0;
+ }
+ }
+
+ node_num = daxctl_dev_get_target_node(dev);
+ if (node_num >= 0) {
+ if (asprintf(&mem->node_path, "%s%d", node_base,
+ node_num) < 0) {
+ err(ctx, "%s: Unable to set node_path\n", devname);
+ goto err_mem;
+ }
+ }
+
+ mem->mem_buf = calloc(1, strlen(node_base) + 256);
+ if (!mem->mem_buf)
+ goto err_node;
+ mem->buf_len = strlen(node_base) + 256;
+
+ return mem;
+
+err_node:
+ free(mem->node_path);
+err_mem:
+ free(mem);
+ return NULL;
+}
+
static void *add_dax_dev(void *parent, int id, const char *daxdev_base)
{
const char *devname = devpath_to_devname(daxdev_base);
@@ -435,6 +533,12 @@ static void *add_dax_dev(void *parent, int id, const char *daxdev_base)
if (rc == 0)
dev->kmod_list = to_module_list(ctx, buf);
+ sprintf(path, "%s/target_node", daxdev_base);
+ if (sysfs_read_attr(ctx, path, buf) == 0)
+ dev->target_node = strtol(buf, NULL, 0);
+ else
+ dev->target_node = -1;
+
daxctl_dev_foreach(region, dev_dup)
if (dev_dup->id == dev->id) {
free_dev(dev, NULL);
@@ -862,6 +966,9 @@ DAXCTL_EXPORT int daxctl_dev_disable(struct daxctl_dev *dev)
if (!daxctl_dev_is_enabled(dev))
return 0;
+ /* If there is a memory object, first free that */
+ free_mem(dev);
+
daxctl_unbind(ctx, dev->dev_path);
if (daxctl_dev_is_enabled(dev)) {
@@ -944,3 +1051,280 @@ DAXCTL_EXPORT unsigned long long daxctl_dev_get_size(struct daxctl_dev *dev)
{
return dev->size;
}
+
+DAXCTL_EXPORT int daxctl_dev_get_target_node(struct daxctl_dev *dev)
+{
+ return dev->target_node;
+}
+
+DAXCTL_EXPORT struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev)
+{
+ if (dev->mem)
+ return dev->mem;
+ else
+ return daxctl_dev_alloc_mem(dev);
+}
+
+DAXCTL_EXPORT struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem)
+{
+ return mem->dev;
+}
+
+DAXCTL_EXPORT const char *daxctl_memory_get_node_path(struct daxctl_memory *mem)
+{
+ return mem->node_path;
+}
+
+DAXCTL_EXPORT unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem)
+{
+ return mem->block_size;
+}
+
+static int online_one_memblock(struct daxctl_dev *dev, char *path)
+{
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ const char *mode = "online_movable";
+ char buf[SYSFS_ATTR_SIZE];
+ int rc;
+
+ rc = sysfs_read_attr(ctx, path, buf);
+ if (rc) {
+ err(ctx, "%s: Failed to read %s: %s\n",
+ devname, path, strerror(-rc));
+ return rc;
+ }
+
+ /*
+ * if already online, possibly due to kernel config or a udev rule,
+ * there is nothing to do and we can skip over the memblock
+ */
+ if (strncmp(buf, "online", 6) == 0)
+ return 1;
+
+ rc = sysfs_write_attr_quiet(ctx, path, mode);
+ if (rc) {
+ /*
+ * While we performed an already-online check above, there
+ * is still a TOCTOU hole where someone (such as a udev rule)
+ * may have raced to online the memory. In such a case,
+ * the sysfs store will fail, however we can check for this
+ * by simply reading the state again. If it changed to the
+ * desired state, then we don't have to error out.
+ */
+ if (sysfs_read_attr(ctx, path, buf) == 0) {
+ if (strncmp(buf, "online", 6) == 0)
+ return 1;
+ }
+ err(ctx, "%s: Failed to online %s: %s\n",
+ devname, path, strerror(-rc));
+ }
+ return rc;
+}
+
+static int offline_one_memblock(struct daxctl_dev *dev, char *path)
+{
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ const char *mode = "offline";
+ char buf[SYSFS_ATTR_SIZE];
+ int rc;
+
+ rc = sysfs_read_attr(ctx, path, buf);
+ if (rc) {
+ err(ctx, "%s: Failed to read %s: %s\n",
+ devname, path, strerror(-rc));
+ return rc;
+ }
+
+ /* if already offline, there is nothing to do */
+ if (strncmp(buf, "offline", 7) == 0)
+ return 1;
+
+ rc = sysfs_write_attr_quiet(ctx, path, mode);
+ if (rc) {
+ /* Close the TOCTOU hole like in online_one_memblock() above */
+ if (sysfs_read_attr(ctx, path, buf) == 0) {
+ if (strncmp(buf, "offline", 7) == 0)
+ return 1;
+ }
+ err(ctx, "%s: Failed to offline %s: %s\n",
+ devname, path, strerror(-rc));
+ }
+ return rc;
+}
+
+static int memblock_is_online(struct daxctl_dev *dev, char *path)
+{
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ char buf[SYSFS_ATTR_SIZE];
+ int rc;
+
+ rc = sysfs_read_attr(ctx, path, buf);
+ if (rc) {
+ err(ctx, "%s: Failed to read %s: %s\n",
+ devname, path, strerror(-rc));
+ return rc;
+ }
+
+ if (strncmp(buf, "online", 6) == 0)
+ return 1;
+
+ /* offline */
+ return 0;
+}
+
+static bool memblock_in_dev(struct daxctl_dev *dev, const char *memblock)
+{
+ struct daxctl_memory *mem = daxctl_dev_get_memory(dev);
+ const char *mem_base = "/sys/devices/system/memory/";
+ unsigned long long memblock_res, dev_start, dev_end;
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ unsigned long memblock_size;
+ int path_len = mem->buf_len;
+ char buf[SYSFS_ATTR_SIZE];
+ unsigned long phys_index;
+ char *path = mem->mem_buf;
+
+ if (snprintf(path, path_len, "%s/%s/phys_index",
+ mem_base, memblock) < 0)
+ return false;
+
+ if (sysfs_read_attr(ctx, path, buf) == 0) {
+ phys_index = strtoul(buf, NULL, 16);
+ if (phys_index == 0 || phys_index == ULONG_MAX) {
+ err(ctx, "%s: %s: Unable to determine phys_index: %s\n",
+ devname, memblock, strerror(errno));
+ return false;
+ }
+ } else {
+ err(ctx, "%s: %s: Unable to determine phys_index: %s\n",
+ devname, memblock, strerror(errno));
+ return false;
+ }
+
+ dev_start = daxctl_dev_get_resource(dev);
+ if (!dev_start) {
+ err(ctx, "%s: Unable to determine resource\n", devname);
+ return false;
+ }
+ dev_end = dev_start + daxctl_dev_get_size(dev);
+
+ memblock_size = daxctl_memory_get_block_size(mem);
+ if (!memblock_size) {
+ err(ctx, "%s: Unable to determine memory block size\n",
+ devname);
+ return false;
+ }
+ memblock_res = phys_index * memblock_size;
+
+ if (memblock_res >= dev_start && memblock_res <= dev_end)
+ return true;
+
+ return false;
+}
+
+static int op_for_one_memblock(struct daxctl_memory *mem, char *path,
+ enum memory_op op)
+{
+ struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ int rc;
+
+ switch (op) {
+ case MEM_SET_ONLINE:
+ return online_one_memblock(dev, path);
+ case MEM_SET_OFFLINE:
+ return offline_one_memblock(dev, path);
+ case MEM_IS_ONLINE:
+ rc = memblock_is_online(dev, path);
+ if (rc < 0)
+ return rc;
+ /*
+ * Retain the 'normal' semantics for if (memblock_is_online()),
+ * but since count needs rc == 0, we'll just flip rc for this op
+ */
+ return !rc;
+ case MEM_COUNT:
+ return 0;
+ }
+
+ err(ctx, "%s: BUG: unknown op: %d\n", devname, op);
+ return -EINVAL;
+}
+
+static int daxctl_memory_op(struct daxctl_memory *mem, enum memory_op op)
+{
+ struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
+ const char *devname = daxctl_dev_get_devname(dev);
+ struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+ const char *node_path;
+ int rc, count = 0;
+ struct dirent *de;
+ DIR *node_dir;
+
+ node_path = daxctl_memory_get_node_path(mem);
+ if (!node_path) {
+ err(ctx, "%s: Failed to get node_path\n", devname);
+ return -ENXIO;
+ }
+
+ node_dir = opendir(node_path);
+ if (!node_dir)
+ return -errno;
+
+ errno = 0;
+ while ((de = readdir(node_dir)) != NULL) {
+ char *path = mem->mem_buf;
+ int len = mem->buf_len;
+
+ if (strncmp(de->d_name, "memory", 6) == 0) {
+ if (!memblock_in_dev(dev, de->d_name))
+ continue;
+ rc = snprintf(path, len, "%s/%s/state",
+ node_path, de->d_name);
+ if (rc < 0) {
+ rc = -ENOMEM;
+ goto out_dir;
+ }
+ rc = op_for_one_memblock(mem, path, op);
+ if (rc < 0)
+ goto out_dir;
+ if (rc == 0)
+ count++;
+ }
+ errno = 0;
+ }
+ if (errno) {
+ rc = -errno;
+ goto out_dir;
+ }
+ rc = count;
+
+out_dir:
+ closedir(node_dir);
+ return rc;
+}
+
+DAXCTL_EXPORT int daxctl_memory_online(struct daxctl_memory *mem)
+{
+ return daxctl_memory_op(mem, MEM_SET_ONLINE);
+}
+
+DAXCTL_EXPORT int daxctl_memory_offline(struct daxctl_memory *mem)
+{
+ return daxctl_memory_op(mem, MEM_SET_OFFLINE);
+}
+
+DAXCTL_EXPORT int daxctl_memory_is_online(struct daxctl_memory *mem)
+{
+ return daxctl_memory_op(mem, MEM_IS_ONLINE);
+}
+
+DAXCTL_EXPORT int daxctl_memory_num_sections(struct daxctl_memory *mem)
+{
+ return daxctl_memory_op(mem, MEM_COUNT);
+}
diff --git a/daxctl/lib/libdaxctl.sym b/daxctl/lib/libdaxctl.sym
index 1692624..bc18604 100644
--- a/daxctl/lib/libdaxctl.sym
+++ b/daxctl/lib/libdaxctl.sym
@@ -59,4 +59,13 @@ global:
daxctl_dev_enable_devdax;
daxctl_dev_enable_ram;
daxctl_dev_get_resource;
+ daxctl_dev_get_target_node;
+ daxctl_dev_get_memory;
+ daxctl_memory_get_dev;
+ daxctl_memory_get_node_path;
+ daxctl_memory_get_block_size;
+ daxctl_memory_online;
+ daxctl_memory_offline;
+ daxctl_memory_is_online;
+ daxctl_memory_num_sections;
} LIBDAXCTL_5;
diff --git a/daxctl/libdaxctl.h b/daxctl/libdaxctl.h
index adf55f3..fb6c3b1 100644
--- a/daxctl/libdaxctl.h
+++ b/daxctl/libdaxctl.h
@@ -73,6 +73,17 @@ int daxctl_dev_is_enabled(struct daxctl_dev *dev);
int daxctl_dev_disable(struct daxctl_dev *dev);
int daxctl_dev_enable_devdax(struct daxctl_dev *dev);
int daxctl_dev_enable_ram(struct daxctl_dev *dev);
+int daxctl_dev_get_target_node(struct daxctl_dev *dev);
+
+struct daxctl_memory;
+struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev);
+struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem);
+const char *daxctl_memory_get_node_path(struct daxctl_memory *mem);
+unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem);
+int daxctl_memory_online(struct daxctl_memory *mem);
+int daxctl_memory_offline(struct daxctl_memory *mem);
+int daxctl_memory_is_online(struct daxctl_memory *mem);
+int daxctl_memory_num_sections(struct daxctl_memory *mem);
#define daxctl_dev_foreach(region, dev) \
for (dev = daxctl_dev_get_first(region); \
--
2.20.1
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm
next prev parent reply other threads:[~2019-08-01 0:32 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-08-01 0:29 [ndctl PATCH v9 00/13] daxctl: add a new reconfigure-device command Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 01/13] libdaxctl: add interfaces to get ctx and check device state Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 02/13] libdaxctl: add interfaces to enable/disable devices Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 03/13] libdaxctl: add an interface to retrieve the device resource Vishal Verma
2019-08-01 0:29 ` Vishal Verma [this message]
2019-08-05 23:57 ` [ndctl PATCH v9 04/13] libdaxctl: add a 'daxctl_memory' object for memory based operations Verma, Vishal L
2019-08-01 0:29 ` [ndctl PATCH v9 05/13] daxctl/list: add target_node for device listings Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 06/13] daxctl/list: display the mode for a dax device Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 07/13] daxctl: add a new reconfigure-device command Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 08/13] Documentation/daxctl: add a man page for daxctl-reconfigure-device Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 09/13] daxctl: add commands to online and offline memory Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 10/13] Documentation: Add man pages for daxctl-{on, off}line-memory Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 11/13] contrib/ndctl: fix region-id completions for daxctl Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 12/13] contrib/ndctl: add bash-completion for the new daxctl commands Vishal Verma
2019-08-01 0:29 ` [ndctl PATCH v9 13/13] test: Add a unit test for daxctl-reconfigure-device and friends Vishal Verma
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190801002932.26430-5-vishal.l.verma@intel.com \
--to=vishal.l.verma@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=linux-nvdimm@lists.01.org \
--cc=pasha.tatashin@soleen.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).