From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: David Hildenbrand <david@redhat.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Pavel Tatashin <pasha.tatashin@soleen.com>,
peterz@infradead.org, ard.biesheuvel@linaro.org,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
linux-acpi@vger.kernel.org, hch@lst.de,
joao.m.martins@oracle.com
Subject: [PATCH v2 14/22] device-dax: Kill dax_kmem_res
Date: Sun, 12 Jul 2020 09:27:21 -0700 [thread overview]
Message-ID: <159457124129.754248.10028584123818131641.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <159457116473.754248.7879464730875147365.stgit@dwillia2-desk3.amr.corp.intel.com>
Several related issues around this unneeded attribute:
- The dax_kmem_res property allows the kmem driver to stash the adjusted
resource range that was used for the hotplug operation, but that can be
recalculated from the original base range.
- kmem is using an open coded release_resource() + kfree() when an
idiomatic release_mem_region() is sufficient.
- The driver managed resource need only manage the busy flag. Other flags
are of no concern to the kmem driver. In fact if kmem inherits some
memory range that add_memory_driver_managed() rejects that is a
memory-hotplug-core policy that the driver is in no position to
override.
- The implementation trusts that failed remove_memory() results in the
entire resource range remaining pinned busy. The driver need not make
that layering violation assumption and just maintain the busy state in
its local resource.
- The "Hot-remove not yet implemented." comment is stale since hotremove
support is now included.
Cc: David Hildenbrand <david@redhat.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/dax/dax-private.h | 3 -
drivers/dax/kmem.c | 123 +++++++++++++++++++++------------------------
2 files changed, 58 insertions(+), 68 deletions(-)
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index 6779f683671d..12a2dbc43b40 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -42,8 +42,6 @@ struct dax_region {
* @dev - device core
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
* @range: resource range for the instance
- * @dax_mem_res: physical address range of hotadded DAX memory
- * @dax_mem_name: name for hotadded DAX memory via add_memory_driver_managed()
*/
struct dev_dax {
struct dax_region *region;
@@ -52,7 +50,6 @@ struct dev_dax {
struct device dev;
struct dev_pagemap *pgmap;
struct range range;
- struct resource *dax_kmem_res;
};
static inline u64 range_len(struct range *range)
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 5bb133df147d..77e25361fbeb 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -19,16 +19,24 @@ static const char *kmem_name;
/* Set if any memory will remain added when the driver will be unloaded. */
static bool any_hotremove_failed;
+static struct range dax_kmem_range(struct dev_dax *dev_dax)
+{
+ struct range range;
+
+ /* memory-block align the hotplug range */
+ range.start = ALIGN(dev_dax->range.start, memory_block_size_bytes());
+ range.end = ALIGN_DOWN(dev_dax->range.end + 1,
+ memory_block_size_bytes()) - 1;
+ return range;
+}
+
int dev_dax_kmem_probe(struct device *dev)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
- struct range *range = &dev_dax->range;
- resource_size_t kmem_start;
- resource_size_t kmem_size;
- resource_size_t kmem_end;
- struct resource *new_res;
- const char *new_res_name;
- int numa_node;
+ struct range range = dax_kmem_range(dev_dax);
+ int numa_node = dev_dax->target_node;
+ struct resource *res;
+ char *res_name;
int rc;
/*
@@ -37,109 +45,94 @@ int dev_dax_kmem_probe(struct device *dev)
* could be mixed in a node with faster memory, causing
* unavoidable performance issues.
*/
- numa_node = dev_dax->target_node;
if (numa_node < 0) {
dev_warn(dev, "rejecting DAX region with invalid node: %d\n",
numa_node);
return -EINVAL;
}
- /* Hotplug starting at the beginning of the next block: */
- kmem_start = ALIGN(range->start, memory_block_size_bytes());
-
- kmem_size = range_len(range);
- /* Adjust the size down to compensate for moving up kmem_start: */
- kmem_size -= kmem_start - range->start;
- /* Align the size down to cover only complete blocks: */
- kmem_size &= ~(memory_block_size_bytes() - 1);
- kmem_end = kmem_start + kmem_size;
-
- new_res_name = kstrdup(dev_name(dev), GFP_KERNEL);
- if (!new_res_name)
+ res_name = kstrdup(dev_name(dev), GFP_KERNEL);
+ if (!res_name)
return -ENOMEM;
- /* Region is permanently reserved if hotremove fails. */
- new_res = request_mem_region(kmem_start, kmem_size, new_res_name);
- if (!new_res) {
- dev_warn(dev, "could not reserve region [%pa-%pa]\n",
- &kmem_start, &kmem_end);
- kfree(new_res_name);
+ res = request_mem_region(range.start, range_len(&range), res_name);
+ if (!res) {
+ dev_warn(dev, "could not reserve region [%#llx-%#llx]\n",
+ range.start, range.end);
+ kfree(res_name);
return -EBUSY;
}
/*
- * Set flags appropriate for System RAM. Leave ..._BUSY clear
- * so that add_memory() can add a child resource. Do not
- * inherit flags from the parent since it may set new flags
- * unknown to us that will break add_memory() below.
+ * Temporarily clear busy to allow add_memory_driver_managed()
+ * to claim it.
*/
- new_res->flags = IORESOURCE_SYSTEM_RAM;
+ res->flags &= ~IORESOURCE_BUSY;
/*
* Ensure that future kexec'd kernels will not treat this as RAM
* automatically.
*/
- rc = add_memory_driver_managed(numa_node, new_res->start,
- resource_size(new_res), kmem_name);
+ rc = add_memory_driver_managed(numa_node, res->start,
+ resource_size(res), kmem_name);
+
+ res->flags |= IORESOURCE_BUSY;
if (rc) {
- release_resource(new_res);
- kfree(new_res);
- kfree(new_res_name);
+ release_mem_region(range.start, range_len(&range));
+ kfree(res_name);
return rc;
}
- dev_dax->dax_kmem_res = new_res;
+
+ dev_set_drvdata(dev, res_name);
return 0;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-static int dev_dax_kmem_remove(struct device *dev)
+static void dax_kmem_release(struct dev_dax *dev_dax)
{
- struct dev_dax *dev_dax = to_dev_dax(dev);
- struct resource *res = dev_dax->dax_kmem_res;
- resource_size_t kmem_start = res->start;
- resource_size_t kmem_size = resource_size(res);
- const char *res_name = res->name;
int rc;
+ struct device *dev = &dev_dax->dev;
+ const char *res_name = dev_get_drvdata(dev);
+ struct range range = dax_kmem_range(dev_dax);
/*
* We have one shot for removing memory, if some memory blocks were not
* offline prior to calling this function remove_memory() will fail, and
* there is no way to hotremove this memory until reboot because device
- * unbind will succeed even if we return failure.
+ * unbind will proceed regardless of the remove_memory result.
*/
- rc = remove_memory(dev_dax->target_node, kmem_start, kmem_size);
- if (rc) {
- any_hotremove_failed = true;
- dev_err(dev,
- "DAX region %pR cannot be hotremoved until the next reboot\n",
- res);
- return rc;
+ rc = remove_memory(dev_dax->target_node, range.start, range_len(&range));
+ if (rc == 0) {
+ release_mem_region(range.start, range_len(&range));
+ dev_set_drvdata(dev, NULL);
+ kfree(res_name);
+ return;
}
- /* Release and free dax resources */
- release_resource(res);
- kfree(res);
- kfree(res_name);
- dev_dax->dax_kmem_res = NULL;
-
- return 0;
+ any_hotremove_failed = true;
+ dev_err(dev, "%#llx-%#llx cannot be hotremoved until the next reboot\n",
+ range.start, range.end);
}
#else
-static int dev_dax_kmem_remove(struct device *dev)
+static void dax_kmem_release(struct dev_dax *dev_dax)
{
/*
- * Without hotremove purposely leak the request_mem_region() for the
- * device-dax range and return '0' to ->remove() attempts. The removal
- * of the device from the driver always succeeds, but the region is
- * permanently pinned as reserved by the unreleased
- * request_mem_region().
+ * Without hotremove purposely leak the request_mem_region() for
+ * the device-dax range attempts. The removal of the device from
+ * the driver always succeeds, but the region is permanently
+ * pinned as reserved by the unreleased request_mem_region().
*/
any_hotremove_failed = true;
- return 0;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
+static int dev_dax_kmem_remove(struct device *dev)
+{
+ dax_kmem_release(to_dev_dax(dev));
+ return 0;
+}
+
static struct dax_device_driver device_dax_kmem_driver = {
.drv = {
.probe = dev_dax_kmem_probe,
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
next prev parent reply other threads:[~2020-07-12 16:43 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-12 16:26 [PATCH v2 00/22] device-dax: Support sub-dividing soft-reserved ranges Dan Williams
2020-07-12 16:26 ` [PATCH v2 01/22] x86/numa: Cleanup configuration dependent command-line options Dan Williams
2020-07-12 16:26 ` [PATCH v2 02/22] x86/numa: Add 'nohmat' option Dan Williams
2020-07-12 16:58 ` Randy Dunlap
2020-07-12 16:26 ` [PATCH v2 03/22] efi/fake_mem: Arrange for a resource entry per efi_fake_mem instance Dan Williams
2020-07-12 16:26 ` [PATCH v2 04/22] ACPI: HMAT: Refactor hmat_register_target_device to hmem_register_device Dan Williams
2020-07-12 16:26 ` [PATCH v2 05/22] resource: Report parent to walk_iomem_res_desc() callback Dan Williams
2020-07-12 16:26 ` [PATCH v2 06/22] x86: Move NUMA_KEEP_MEMINFO and related definition to x86-internals Dan Williams
2020-07-12 16:26 ` [PATCH v2 07/22] numa: Introduce a generic memory_add_physaddr_to_nid() Dan Williams
2020-07-13 6:58 ` Mike Rapoport
2020-07-13 15:42 ` Dan Williams
2020-07-12 16:26 ` [PATCH v2 08/22] memblock: Introduce a generic phys_addr_to_target_node() Dan Williams
2020-07-13 7:03 ` Mike Rapoport
2020-07-13 15:48 ` Dan Williams
2020-07-14 1:36 ` Justin He
2020-07-12 16:26 ` [PATCH v2 09/22] arm64: Convert to generic memblock for numa-info Dan Williams
2020-07-12 16:26 ` [PATCH v2 10/22] ACPI: HMAT: Attach a device for each soft-reserved range Dan Williams
2020-07-12 16:27 ` [PATCH v2 11/22] device-dax: Drop the dax_region.pfn_flags attribute Dan Williams
2020-07-12 16:27 ` [PATCH v2 12/22] device-dax: Move instance creation parameters to 'struct dev_dax_data' Dan Williams
2020-07-12 16:27 ` [PATCH v2 13/22] device-dax: Make pgmap optional for instance creation Dan Williams
2020-07-12 16:27 ` Dan Williams [this message]
2020-07-12 16:27 ` [PATCH v2 15/22] device-dax: Add an allocation interface for device-dax instances Dan Williams
2020-07-12 16:27 ` [PATCH v2 16/22] device-dax: Introduce 'seed' devices Dan Williams
2020-07-12 16:27 ` [PATCH v2 17/22] drivers/base: Make device_find_child_by_name() compatible with sysfs inputs Dan Williams
2020-07-12 17:09 ` Greg Kroah-Hartman
2020-07-13 15:39 ` Dan Williams
2020-07-13 15:52 ` Greg Kroah-Hartman
2020-07-13 16:09 ` Dan Williams
2020-07-13 16:12 ` Greg Kroah-Hartman
2020-07-13 16:36 ` Dan Williams
2020-07-12 16:27 ` [PATCH v2 18/22] device-dax: Add resize support Dan Williams
2020-07-12 16:27 ` [PATCH v2 19/22] mm/memremap_pages: Convert to 'struct range' Dan Williams
2020-07-13 16:36 ` Ralph Campbell
2020-07-13 16:54 ` Dan Williams
2020-07-12 16:27 ` [PATCH v2 20/22] mm/memremap_pages: Support multiple ranges per invocation Dan Williams
2020-07-12 16:27 ` [PATCH v2 21/22] device-dax: Add dis-contiguous resource support Dan Williams
2020-07-12 16:28 ` [PATCH v2 22/22] device-dax: Introduce 'mapping' devices Dan Williams
2020-07-16 13:18 ` Joao Martins
2020-07-16 16:00 ` Dan Williams
2020-07-16 19:04 ` Joao Martins
2020-07-16 17:29 ` [PATCH v1 0/4] device-dax: Further improvements to subdivision Joao Martins
2020-07-16 17:29 ` [PATCH v1 1/4] device-dax: Make align a per-device property Joao Martins
2020-07-16 17:29 ` [PATCH v1 2/4] device-dax: Add an 'align' attribute Joao Martins
2020-07-31 14:52 ` Dan Williams
2020-07-16 17:29 ` [PATCH v1 3/4] dax/hmem: Introduce dax_hmem.region_idle parameter Joao Martins
2020-07-16 17:29 ` [PATCH v1 4/4] device-dax: Add a range mapping allocation attribute Joao Martins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=159457124129.754248.10028584123818131641.stgit@dwillia2-desk3.amr.corp.intel.com \
--to=dan.j.williams@intel.com \
--cc=ard.biesheuvel@linaro.org \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=hch@lst.de \
--cc=joao.m.martins@oracle.com \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@lists.01.org \
--cc=pasha.tatashin@soleen.com \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).