Linux-NVDIMM Archive on lore.kernel.org
 help / color / Atom feed
From: Vishal Verma <vishal.l.verma@intel.com>
To: <linux-nvdimm@lists.01.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
	Ben Olson <ben.olson@intel.com>,
	Michal Biesek <michal.biesek@intel.com>
Subject: [ndctl PATCH 07/10] daxctl: detect races when onlining memory blocks
Date: Wed,  2 Oct 2019 17:49:22 -0600
Message-ID: <20191002234925.9190-8-vishal.l.verma@intel.com> (raw)
In-Reply-To: <20191002234925.9190-1-vishal.l.verma@intel.com>

Sometimes, system configuration can result in new memory blocks getting
onlined automatically. Often, these auto-onlining mechanisms don't
provide a choice or configurability in the matter of which zone is used
for these new blocks, and they just end up in ZONE_NORMAL.

Usually, for hot-plugged memory, ZONE_NORMAL is undesirable because:
 - An application might want total control over this memory
 - ZONE_NORMAL precludes hot-removal of this memory
 - Having kernel data structures in this memory, especially performance
   sensitive ones, such as page tables, may be undesirable.

Thus report if a race condition is encountered while onlining memory,
and provide the user options to remedy it.

Clarify the default zone expectations, and the race detection behavior
in the daxctl-reconfigure-device man page, and move the relevant section
under the 'Description' header, instead of hidden away under the
'--no-online' option.

Cc: Ben Olson <ben.olson@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 daxctl/device.c        |  9 ++++++
 daxctl/lib/libdaxctl.c | 62 +++++++++++++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/daxctl/device.c b/daxctl/device.c
index 920efc6..28698bf 100644
--- a/daxctl/device.c
+++ b/daxctl/device.c
@@ -174,6 +174,15 @@ static int dev_online_memory(struct daxctl_dev *dev)
 			devname, strerror(-num_on));
 		return num_on;
 	}
+	if (num_on)
+		fprintf(stderr,
+		    "%s:\n  WARNING: detected a race while onlining memory\n"
+		    "  Some memory may not be in the expected zone. It is\n"
+		    "  recommended to disable any other onlining mechanisms,\n"
+		    "  and retry. If onlining is to be left to other agents,\n"
+		    "  use the --no-online option to suppress this warning\n",
+		    devname);
+
 	if (num_on == num_sections) {
 		fprintf(stderr, "%s: all memory sections (%d) already online\n",
 			devname, num_on);
diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
index 617887c..5a7e37c 100644
--- a/daxctl/lib/libdaxctl.c
+++ b/daxctl/lib/libdaxctl.c
@@ -1079,10 +1079,10 @@ static int memblock_is_online(struct daxctl_memory *mem, char *memblock)
 	return 0;
 }
 
-static int online_one_memblock(struct daxctl_memory *mem, char *memblock)
+static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
+		int *status)
 {
 	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
-	const char *devname = daxctl_dev_get_devname(dev);
 	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
 	const char *mode = "online_movable";
 	int len = mem->buf_len, rc;
@@ -1097,10 +1097,6 @@ static int online_one_memblock(struct daxctl_memory *mem, char *memblock)
 	if (rc < 0)
 		return -ENOMEM;
 
-	/*
-	 * if already online, possibly due to kernel config or a udev rule,
-	 * there is nothing to do and we can skip over the memblock
-	 */
 	rc = memblock_is_online(mem, memblock);
 	if (rc)
 		return rc;
@@ -1108,18 +1104,14 @@ static int online_one_memblock(struct daxctl_memory *mem, char *memblock)
 	rc = sysfs_write_attr_quiet(ctx, path, mode);
 	if (rc) {
 		/*
-		 * While we performed an already-online check above, there
-		 * is still a TOCTOU hole where someone (such as a udev rule)
-		 * may have raced to online the memory. In such a case,
-		 * the sysfs store will fail, however we can check for this
-		 * by simply reading the state again. If it changed to the
-		 * desired state, then we don't have to error out.
+		 * If the block got onlined, potentially by some other agent,
+		 * do nothing for now. There will be a full scan for zone
+		 * correctness later.
 		 */
-		if (memblock_is_online(mem, memblock))
-			return 1;
-		err(ctx, "%s: Failed to online %s: %s\n",
-			devname, path, strerror(-rc));
+		if (memblock_is_online(mem, memblock) == 1)
+			return 0;
 	}
+
 	return rc;
 }
 
@@ -1150,7 +1142,7 @@ static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
 
 	rc = sysfs_write_attr_quiet(ctx, path, mode);
 	if (rc) {
-		/* Close the TOCTOU hole like in online_one_memblock() above */
+		/* check if something raced us to offline (unlikely) */
 		if (!memblock_is_online(mem, memblock))
 			return 1;
 		err(ctx, "%s: Failed to offline %s: %s\n",
@@ -1274,7 +1266,7 @@ static int op_for_one_memblock(struct daxctl_memory *mem, char *memblock,
 
 	switch (op) {
 	case MEM_SET_ONLINE:
-		return online_one_memblock(mem, memblock);
+		return online_one_memblock(mem, memblock, status);
 	case MEM_SET_OFFLINE:
 		return offline_one_memblock(mem, memblock);
 	case MEM_IS_ONLINE:
@@ -1349,9 +1341,41 @@ out_dir:
 	return rc;
 }
 
+/*
+ * daxctl_memory_online() will online to ZONE_MOVABLE by default
+ */
 DAXCTL_EXPORT int daxctl_memory_online(struct daxctl_memory *mem)
 {
-	return daxctl_memory_op(mem, MEM_SET_ONLINE);
+	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	int rc;
+
+	rc = daxctl_memory_op(mem, MEM_SET_ONLINE);
+	if (rc)
+		return rc;
+
+	/*
+	 * Detect any potential races when blocks were being brought online by
+	 * checking the zone in which the memory blocks are at this point. If
+	 * any of the blocks are not in ZONE_MOVABLE, emit a warning.
+	 */
+	mem->zone = 0;
+	rc = daxctl_memory_op(mem, MEM_FIND_ZONE);
+	if (rc)
+		return rc;
+	if (mem->zone != MEM_ZONE_MOVABLE) {
+		err(ctx,
+		    "%s:\n  WARNING: detected a race while onlining memory\n"
+		    "  Some memory may not be in the expected zone. It is\n"
+		    "  recommended to disable any other onlining mechanisms,\n"
+		    "  and retry. If onlining is to be left to other agents,\n"
+		    "  use the --no-online option to suppress this warning\n",
+		    devname);
+		return -EBUSY;
+	}
+
+	return rc;
 }
 
 DAXCTL_EXPORT int daxctl_memory_offline(struct daxctl_memory *mem)
-- 
2.20.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

  parent reply index

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-02 23:49 [ndctl PATCH 00/10] fixes and movability for system-ram mode Vishal Verma
2019-10-02 23:49 ` [ndctl PATCH 01/10] libdaxctl: refactor path construction in op_for_one_memblock() Vishal Verma
2019-10-18 18:43   ` Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 02/10] libdaxctl: refactor memblock_is_online() checks Vishal Verma
2019-10-18 18:45   ` Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 03/10] daxctl/device.c: fix json output omission for reconfigure-device Vishal Verma
2019-10-18 18:46   ` Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 04/10] libdaxctl: add an API to determine if memory is movable Vishal Verma
2019-10-18 18:54   ` Dan Williams
2019-10-18 19:57     ` Verma, Vishal L
2019-10-18 20:40       ` Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 05/10] libdaxctl: allow memblock_in_dev() to return an error Vishal Verma
2019-10-18 19:15   ` Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 06/10] daxctl: show a 'movable' attribute in device listings Vishal Verma
2019-10-18 19:16   ` Dan Williams
2019-10-02 23:49 ` Vishal Verma [this message]
2019-10-18 19:26   ` [ndctl PATCH 07/10] daxctl: detect races when onlining memory blocks Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 08/10] Documentation: clarify memory movablity for reconfigure-device Vishal Verma
2019-10-18 20:46   ` Dan Williams
2019-10-18 20:50     ` Verma, Vishal L
2019-10-02 23:49 ` [ndctl PATCH 09/10] libdaxctl: add an API to online memory in a non-movable state Vishal Verma
2019-10-18 20:54   ` Dan Williams
2019-10-02 23:49 ` [ndctl PATCH 10/10] daxctl: add --no-movable option for onlining memory Vishal Verma
2019-10-18 20:58   ` Dan Williams
2019-10-18 21:04     ` Verma, Vishal L
2019-10-18 21:25       ` Dan Williams

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191002234925.9190-8-vishal.l.verma@intel.com \
    --to=vishal.l.verma@intel.com \
    --cc=ben.olson@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-nvdimm@lists.01.org \
    --cc=michal.biesek@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-NVDIMM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvdimm/0 linux-nvdimm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvdimm linux-nvdimm/ https://lore.kernel.org/linux-nvdimm \
		linux-nvdimm@lists.01.org
	public-inbox-index linux-nvdimm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.01.lists.linux-nvdimm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git