All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: Joao Martins <joao.m.martins@oracle.com>,
	dave.hansen@linux.intel.com, linux-mm@kvack.org,
	linux-nvdimm@lists.01.org, linux-kernel@vger.kernel.org
Subject: [PATCH v5 17/17] device-dax: add a range mapping allocation attribute
Date: Fri, 25 Sep 2020 12:13:15 -0700	[thread overview]
Message-ID: <160106119570.30709.4548889722645210610.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <160106109960.30709.7379926726669669398.stgit@dwillia2-desk3.amr.corp.intel.com>

From: Joao Martins <joao.m.martins@oracle.com>

Add a sysfs attribute which denotes a range from the dax region to be
allocated.  It's an write only @mapping sysfs attribute in the format of
'<start>-<end>' to allocate a range.  @start and @end use hexadecimal
values and the @pgoff is implicitly ordered wrt to previous writes to
@mapping sysfs e.g.  a write of a range of length 1G the pgoff is
0..1G(-4K), a second write will use @pgoff for 1G+4K..<size>.

This range mapping interface is useful for:

 1) Application which want to implement its own allocation logic,
 and thus pick the desired ranges from dax_region.

 2) For use cases like VMM fast restart[0] where after kexec we
 want to the same gpa<->phys mappings (as originally created
 before kexec).

[0] https://static.sched.com/hosted_files/kvmforum2019/66/VMM-fast-restart_kvmforum2019.pdf

Link: https://lkml.kernel.org/r/159643106970.4062302.10402616567780784722.stgit@dwillia2-desk3.amr.corp.intel.com
Link: https://lore.kernel.org/r/20200716172913.19658-5-joao.m.martins@oracle.com
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/bus.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 0ac4a9c0fd18..27513d311242 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -1043,6 +1043,67 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(size);
 
+static ssize_t range_parse(const char *opt, size_t len, struct range *range)
+{
+	unsigned long long addr = 0;
+	char *start, *end, *str;
+	ssize_t rc = EINVAL;
+
+	str = kstrdup(opt, GFP_KERNEL);
+	if (!str)
+		return rc;
+
+	end = str;
+	start = strsep(&end, "-");
+	if (!start || !end)
+		goto err;
+
+	rc = kstrtoull(start, 16, &addr);
+	if (rc)
+		goto err;
+	range->start = addr;
+
+	rc = kstrtoull(end, 16, &addr);
+	if (rc)
+		goto err;
+	range->end = addr;
+
+err:
+	kfree(str);
+	return rc;
+}
+
+static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t len)
+{
+	struct dev_dax *dev_dax = to_dev_dax(dev);
+	struct dax_region *dax_region = dev_dax->region;
+	size_t to_alloc;
+	struct range r;
+	ssize_t rc;
+
+	rc = range_parse(buf, len, &r);
+	if (rc)
+		return rc;
+
+	rc = -ENXIO;
+	device_lock(dax_region->dev);
+	if (!dax_region->dev->driver) {
+		device_unlock(dax_region->dev);
+		return rc;
+	}
+	device_lock(dev);
+
+	to_alloc = range_len(&r);
+	if (alloc_is_aligned(dev_dax, to_alloc))
+		rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
+	device_unlock(dev);
+	device_unlock(dax_region->dev);
+
+	return rc == 0 ? len : rc;
+}
+static DEVICE_ATTR_WO(mapping);
+
 static ssize_t align_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -1175,6 +1236,8 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 		return 0;
 	if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
 		return 0;
+	if (a == &dev_attr_mapping.attr && is_static(dax_region))
+		return 0;
 	if ((a == &dev_attr_align.attr ||
 	     a == &dev_attr_size.attr) && is_static(dax_region))
 		return 0444;
@@ -1184,6 +1247,7 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 static struct attribute *dev_dax_attributes[] = {
 	&dev_attr_modalias.attr,
 	&dev_attr_size.attr,
+	&dev_attr_mapping.attr,
 	&dev_attr_target_node.attr,
 	&dev_attr_align.attr,
 	&dev_attr_resource.attr,
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: Joao Martins <joao.m.martins@oracle.com>,
	vishal.l.verma@intel.com, dave.hansen@linux.intel.com,
	linux-mm@kvack.org, linux-nvdimm@lists.01.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v5 17/17] device-dax: add a range mapping allocation attribute
Date: Fri, 25 Sep 2020 12:13:15 -0700	[thread overview]
Message-ID: <160106119570.30709.4548889722645210610.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <160106109960.30709.7379926726669669398.stgit@dwillia2-desk3.amr.corp.intel.com>

From: Joao Martins <joao.m.martins@oracle.com>

Add a sysfs attribute which denotes a range from the dax region to be
allocated.  It's an write only @mapping sysfs attribute in the format of
'<start>-<end>' to allocate a range.  @start and @end use hexadecimal
values and the @pgoff is implicitly ordered wrt to previous writes to
@mapping sysfs e.g.  a write of a range of length 1G the pgoff is
0..1G(-4K), a second write will use @pgoff for 1G+4K..<size>.

This range mapping interface is useful for:

 1) Application which want to implement its own allocation logic,
 and thus pick the desired ranges from dax_region.

 2) For use cases like VMM fast restart[0] where after kexec we
 want to the same gpa<->phys mappings (as originally created
 before kexec).

[0] https://static.sched.com/hosted_files/kvmforum2019/66/VMM-fast-restart_kvmforum2019.pdf

Link: https://lkml.kernel.org/r/159643106970.4062302.10402616567780784722.stgit@dwillia2-desk3.amr.corp.intel.com
Link: https://lore.kernel.org/r/20200716172913.19658-5-joao.m.martins@oracle.com
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/bus.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 0ac4a9c0fd18..27513d311242 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -1043,6 +1043,67 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(size);
 
+static ssize_t range_parse(const char *opt, size_t len, struct range *range)
+{
+	unsigned long long addr = 0;
+	char *start, *end, *str;
+	ssize_t rc = EINVAL;
+
+	str = kstrdup(opt, GFP_KERNEL);
+	if (!str)
+		return rc;
+
+	end = str;
+	start = strsep(&end, "-");
+	if (!start || !end)
+		goto err;
+
+	rc = kstrtoull(start, 16, &addr);
+	if (rc)
+		goto err;
+	range->start = addr;
+
+	rc = kstrtoull(end, 16, &addr);
+	if (rc)
+		goto err;
+	range->end = addr;
+
+err:
+	kfree(str);
+	return rc;
+}
+
+static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t len)
+{
+	struct dev_dax *dev_dax = to_dev_dax(dev);
+	struct dax_region *dax_region = dev_dax->region;
+	size_t to_alloc;
+	struct range r;
+	ssize_t rc;
+
+	rc = range_parse(buf, len, &r);
+	if (rc)
+		return rc;
+
+	rc = -ENXIO;
+	device_lock(dax_region->dev);
+	if (!dax_region->dev->driver) {
+		device_unlock(dax_region->dev);
+		return rc;
+	}
+	device_lock(dev);
+
+	to_alloc = range_len(&r);
+	if (alloc_is_aligned(dev_dax, to_alloc))
+		rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
+	device_unlock(dev);
+	device_unlock(dax_region->dev);
+
+	return rc == 0 ? len : rc;
+}
+static DEVICE_ATTR_WO(mapping);
+
 static ssize_t align_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -1175,6 +1236,8 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 		return 0;
 	if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
 		return 0;
+	if (a == &dev_attr_mapping.attr && is_static(dax_region))
+		return 0;
 	if ((a == &dev_attr_align.attr ||
 	     a == &dev_attr_size.attr) && is_static(dax_region))
 		return 0444;
@@ -1184,6 +1247,7 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 static struct attribute *dev_dax_attributes[] = {
 	&dev_attr_modalias.attr,
 	&dev_attr_size.attr,
+	&dev_attr_mapping.attr,
 	&dev_attr_target_node.attr,
 	&dev_attr_align.attr,
 	&dev_attr_resource.attr,


  parent reply	other threads:[~2020-09-25 19:31 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-25 19:11 [PATCH v5 00/17] device-dax: support sub-dividing soft-reserved ranges Dan Williams
2020-09-25 19:11 ` Dan Williams
2020-09-25 19:11 ` [PATCH v5 01/17] device-dax: make pgmap optional for instance creation Dan Williams
2020-09-25 19:11   ` Dan Williams
2020-10-01  8:41   ` David Hildenbrand
2020-10-01  8:41     ` David Hildenbrand
2020-10-01 16:54     ` Dan Williams
2020-10-01 16:54       ` Dan Williams
2020-10-01 16:54       ` Dan Williams
2020-10-01 17:39       ` David Hildenbrand
2020-10-01 17:39         ` David Hildenbrand
2020-10-01 19:12         ` Dan Williams
2020-10-01 19:12           ` Dan Williams
2020-10-01 19:12           ` Dan Williams
2020-09-25 19:11 ` [PATCH v5 02/17] device-dax/kmem: introduce dax_kmem_range() Dan Williams
2020-09-25 19:11   ` Dan Williams
2020-09-30 16:14   ` David Hildenbrand
2020-09-30 16:14     ` David Hildenbrand
2020-09-25 19:11 ` [PATCH v5 03/17] device-dax/kmem: move resource name tracking to drvdata Dan Williams
2020-09-25 19:11   ` Dan Williams
2020-09-30 16:19   ` David Hildenbrand
2020-09-30 16:19     ` David Hildenbrand
2020-09-25 19:12 ` [PATCH v5 04/17] device-dax/kmem: replace release_resource() with release_mem_region() Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-30 16:23   ` David Hildenbrand
2020-09-30 16:23     ` David Hildenbrand
2020-09-30 17:28     ` Dan Williams
2020-09-30 17:28       ` Dan Williams
2020-09-30 17:28       ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 05/17] device-dax: add an allocation interface for device-dax instances Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 06/17] device-dax: introduce 'struct dev_dax' typed-driver operations Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 07/17] device-dax: introduce 'seed' devices Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 08/17] drivers/base: make device_find_child_by_name() compatible with sysfs inputs Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 09/17] device-dax: add resize support Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 10/17] mm/memremap_pages: convert to 'struct range' Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-28 19:12   ` boris.ostrovsky
2020-09-28 19:12     ` boris.ostrovsky
2020-09-25 19:12 ` [PATCH v5 11/17] mm/memremap_pages: support multiple ranges per invocation Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 12/17] device-dax: add dis-contiguous resource support Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 13/17] device-dax: introduce 'mapping' devices Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:12 ` [PATCH v5 14/17] device-dax: make align a per-device property Dan Williams
2020-09-25 19:12   ` Dan Williams
2020-09-25 19:13 ` [PATCH v5 15/17] device-dax: add an 'align' attribute Dan Williams
2020-09-25 19:13   ` Dan Williams
2020-09-26  2:22   ` Andrew Morton
2020-09-26  2:22     ` Andrew Morton
2020-09-26  3:31     ` Dan Williams
2020-09-26  3:31       ` Dan Williams
2020-09-26  3:31       ` Dan Williams
2020-09-25 19:13 ` [PATCH v5 16/17] dax/hmem: introduce dax_hmem.region_idle parameter Dan Williams
2020-09-25 19:13   ` Dan Williams
2020-09-25 19:13 ` Dan Williams [this message]
2020-09-25 19:13   ` [PATCH v5 17/17] device-dax: add a range mapping allocation attribute Dan Williams
2020-09-25 20:51 ` [PATCH v5 00/17] device-dax: support sub-dividing soft-reserved ranges Joao Martins
2020-09-25 20:51   ` Joao Martins
2020-09-25 21:01   ` Dan Williams
2020-09-25 21:01     ` Dan Williams
2020-09-25 21:05     ` Joao Martins
2020-09-25 21:05       ` Joao Martins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=160106119570.30709.4548889722645210610.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=joao.m.martins@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.