Linux-NVDIMM Archive on lore.kernel.org
 help / color / Atom feed
* [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
@ 2019-10-28  9:48 Aneesh Kumar K.V
  2019-10-28  9:48 ` [RFC PATCH 2/4] libnvdimm/namespace: Disable the region if the namespace size is not aligned correctly Aneesh Kumar K.V
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-28  9:48 UTC (permalink / raw)
  To: dan.j.williams, mpe; +Cc: linux-nvdimm, linuxppc-dev, Aneesh Kumar K.V

The page size used to map the namespace is arch dependent. For example
architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
size is not aligned to the mapping page size, we can observe kernel crash
during namespace init and destroy.

This is due to kernel doing partial map/unmap of the resource range

BUG: Unable to handle kernel data access at 0xc001000406000000
Faulting instruction address: 0xc000000000090790
NIP [c000000000090790] arch_add_memory+0xc0/0x130
LR [c000000000090744] arch_add_memory+0x74/0x130
Call Trace:
 arch_add_memory+0x74/0x130 (unreliable)
 memremap_pages+0x74c/0xa30
 devm_memremap_pages+0x3c/0xa0
 pmem_attach_disk+0x188/0x770
 nvdimm_bus_probe+0xd8/0x470
 really_probe+0x148/0x570
 driver_probe_device+0x19c/0x1d0
 device_driver_attach+0xcc/0x100
 bind_store+0x134/0x1c0
 drv_attr_store+0x44/0x60
 sysfs_kf_write+0x74/0xc0
 kernfs_fop_write+0x1b4/0x290
 __vfs_write+0x3c/0x70
 vfs_write+0xd0/0x260
 ksys_write+0xdc/0x130
 system_call+0x5c/0x68

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/arm64/mm/flush.c     | 11 +++++++++++
 arch/powerpc/lib/pmem.c   | 21 +++++++++++++++++++--
 arch/x86/mm/pageattr.c    | 12 ++++++++++++
 include/linux/libnvdimm.h |  1 +
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index ac485163a4a7..90c54c600023 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -91,4 +91,15 @@ void arch_invalidate_pmem(void *addr, size_t size)
 	__inval_dcache_area(addr, size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
+{
+	u32 remainder;
+
+	div_u64_rem(size, PAGE_SIZE * ndr_mappings, &remainder);
+	if (remainder)
+		return PAGE_SIZE * ndr_mappings;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
 #endif
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 377712e85605..2e661a08dae5 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
 	unsigned long start = (unsigned long) addr;
 	flush_dcache_range(start, start + size);
 }
-EXPORT_SYMBOL(arch_wb_cache_pmem);
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
 	flush_dcache_range(start, start + size);
 }
-EXPORT_SYMBOL(arch_invalidate_pmem);
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
+{
+	u32 remainder;
+	unsigned long linear_map_size;
+
+	if (radix_enabled())
+		linear_map_size = PAGE_SIZE;
+	else
+		linear_map_size = (1UL << mmu_psize_defs[mmu_linear_psize].shift);
+
+	div_u64_rem(size, linear_map_size * ndr_mappings, &remainder);
+	if (remainder)
+		return linear_map_size * ndr_mappings;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
 
 /*
  * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0d09cc5aad61..023329d7dfac 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -310,6 +310,18 @@ void arch_invalidate_pmem(void *addr, size_t size)
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 
+unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
+{
+	u32 remainder;
+
+	div_u64_rem(size, PAGE_SIZE * ndr_mappings, &remainder);
+	if (remainder)
+		return PAGE_SIZE * ndr_mappings;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
+
+
 static void __cpa_flush_all(void *arg)
 {
 	unsigned long cache = (unsigned long)arg;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index b6eddf912568..e2f8387d9ef4 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -291,4 +291,5 @@ static inline void arch_invalidate_pmem(void *addr, size_t size)
 }
 #endif
 
+unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size);
 #endif /* __LIBNVDIMM_H__ */
-- 
2.21.0
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC PATCH 2/4] libnvdimm/namespace: Disable the region if the namespace size is not aligned correctly
  2019-10-28  9:48 [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Aneesh Kumar K.V
@ 2019-10-28  9:48 ` Aneesh Kumar K.V
  2019-10-28  9:48 ` [RFC PATCH 3/4] libnvdimm/namespace: Use direct-mapping page size to validate namespace size Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-28  9:48 UTC (permalink / raw)
  To: dan.j.williams, mpe; +Cc: linux-nvdimm, linuxppc-dev, Aneesh Kumar K.V

During namespace initialization, if kernel finds the namespace size not
aligned as per arch-specific restriction, disable the region completely.

Even though kernel validates the namespace size while creating the namespace
nvdimm core still needs to make sure namespaces with wrong size are not initialized.
This can happen when users move SCM across different architectures and with
architectures like ppc64 when we use different MMU translation modes.

ppc64 allow booting systems with different translation mode based on kernel
parameter and the two translation mode (hash and radix) have different
direct-map page size restrictions.

Marking the region disabled enables the user to either re-init the namespace
using the dimm interface or boot back to the right kernel supporting the alignment.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/nvdimm/namespace_devs.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index cca0a3ba1d2c..fcde9f2bf2ea 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1912,6 +1912,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	struct nd_label_ent *label_ent;
 	struct nd_namespace_pmem *nspm;
 	struct nd_mapping *nd_mapping;
+	unsigned long map_align_size;
 	resource_size_t size = 0;
 	struct resource *res;
 	struct device *dev;
@@ -2010,6 +2011,20 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 
 	}
 
+	map_align_size = arch_validate_namespace_size(nd_region->ndr_mappings, size);
+	if (map_align_size) {
+		dev_err(&nd_region->dev, "%llu is not %ldK aligned\n", size,
+				map_align_size/ SZ_1K);
+		/*
+		 * Disable this region completely. A wrongly sized namespace
+		 * size implies the start address of other namespace would also
+		 * be wrong and we would find confusing crashes w.r.t
+		 * direct-map address.
+		 */
+		rc = -EINVAL;
+		goto err;
+	}
+
 	if (!nspm->alt_name || !nspm->uuid) {
 		rc = -ENOMEM;
 		goto err;
-- 
2.21.0
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC PATCH 3/4] libnvdimm/namespace: Use direct-mapping page size to validate namespace size
  2019-10-28  9:48 [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Aneesh Kumar K.V
  2019-10-28  9:48 ` [RFC PATCH 2/4] libnvdimm/namespace: Disable the region if the namespace size is not aligned correctly Aneesh Kumar K.V
@ 2019-10-28  9:48 ` Aneesh Kumar K.V
  2019-10-28  9:48 ` [RFC PATCH 4/4] libnvdimm/namespace: Add debug check while initializing namespace resource size Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-28  9:48 UTC (permalink / raw)
  To: dan.j.williams, mpe; +Cc: linux-nvdimm, linuxppc-dev, Aneesh Kumar K.V

Architectures like ppc64 use different page size than PAGE_SIZE to map
direct-map address range. The kernel needs to make sure the namespace size is aligned
correctly for the direct-map page size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/nvdimm/namespace_devs.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index fcde9f2bf2ea..83c70631a86f 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -975,7 +975,8 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 	struct nd_mapping *nd_mapping;
 	struct nvdimm_drvdata *ndd;
 	struct nd_label_id label_id;
-	u32 flags = 0, remainder;
+	unsigned long map_size;
+	u32 flags = 0;
 	int rc, i, id = -1;
 	u8 *uuid = NULL;
 
@@ -1006,10 +1007,9 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 		return -ENXIO;
 	}
 
-	div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
-	if (remainder) {
-		dev_dbg(dev, "%llu is not %ldK aligned\n", val,
-				(PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
+	map_size = arch_validate_namespace_size(nd_region->ndr_mappings, val);
+	if (map_size) {
+		dev_err(dev, "%llu is not %ldK aligned\n", val, map_size / SZ_1K);
 		return -EINVAL;
 	}
 
-- 
2.21.0
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC PATCH 4/4] libnvdimm/namespace: Add debug check while initializing namespace resource size.
  2019-10-28  9:48 [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Aneesh Kumar K.V
  2019-10-28  9:48 ` [RFC PATCH 2/4] libnvdimm/namespace: Disable the region if the namespace size is not aligned correctly Aneesh Kumar K.V
  2019-10-28  9:48 ` [RFC PATCH 3/4] libnvdimm/namespace: Use direct-mapping page size to validate namespace size Aneesh Kumar K.V
@ 2019-10-28  9:48 ` Aneesh Kumar K.V
  2019-10-28 21:21 ` [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Ira Weiny
  2019-10-28 23:08 ` Dan Williams
  4 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-28  9:48 UTC (permalink / raw)
  To: dan.j.williams, mpe; +Cc: linux-nvdimm, linuxppc-dev, Aneesh Kumar K.V

This should enable us to catch if we are initializing the namespace with a wrong
size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/nvdimm/namespace_devs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 83c70631a86f..9f2dc20bcc73 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -956,6 +956,18 @@ static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
  out:
 	res->start = nd_region->ndr_start + offset;
 	res->end = res->start + size - 1;
+#ifdef CONFIG_DEBUG_VM
+	if (size) {
+		unsigned long map_size;
+
+		map_size = arch_validate_namespace_size(nd_region->ndr_mappings, size);
+		WARN_ON(map_size);
+
+		map_size = arch_validate_namespace_size(1, res->start);
+		WARN_ON(map_size);
+
+	}
+#endif
 }
 
 static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
-- 
2.21.0
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-28  9:48 [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2019-10-28  9:48 ` [RFC PATCH 4/4] libnvdimm/namespace: Add debug check while initializing namespace resource size Aneesh Kumar K.V
@ 2019-10-28 21:21 ` Ira Weiny
  2019-10-29  4:34   ` Aneesh Kumar K.V
  2019-10-28 23:08 ` Dan Williams
  4 siblings, 1 reply; 14+ messages in thread
From: Ira Weiny @ 2019-10-28 21:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: mpe, linux-nvdimm, linuxppc-dev

> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
> index 377712e85605..2e661a08dae5 100644
> --- a/arch/powerpc/lib/pmem.c
> +++ b/arch/powerpc/lib/pmem.c
> @@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
>  	unsigned long start = (unsigned long) addr;
>  	flush_dcache_range(start, start + size);
>  }
> -EXPORT_SYMBOL(arch_wb_cache_pmem);
> +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);

Unrelated change?

Ira
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-28  9:48 [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2019-10-28 21:21 ` [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Ira Weiny
@ 2019-10-28 23:08 ` Dan Williams
  2019-10-29  4:34   ` Aneesh Kumar K.V
  4 siblings, 1 reply; 14+ messages in thread
From: Dan Williams @ 2019-10-28 23:08 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On Mon, Oct 28, 2019 at 2:48 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> The page size used to map the namespace is arch dependent. For example
> architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
> size is not aligned to the mapping page size, we can observe kernel crash
> during namespace init and destroy.
>
> This is due to kernel doing partial map/unmap of the resource range
>
> BUG: Unable to handle kernel data access at 0xc001000406000000
> Faulting instruction address: 0xc000000000090790
> NIP [c000000000090790] arch_add_memory+0xc0/0x130
> LR [c000000000090744] arch_add_memory+0x74/0x130
> Call Trace:
>  arch_add_memory+0x74/0x130 (unreliable)
>  memremap_pages+0x74c/0xa30
>  devm_memremap_pages+0x3c/0xa0
>  pmem_attach_disk+0x188/0x770
>  nvdimm_bus_probe+0xd8/0x470
>  really_probe+0x148/0x570
>  driver_probe_device+0x19c/0x1d0
>  device_driver_attach+0xcc/0x100
>  bind_store+0x134/0x1c0
>  drv_attr_store+0x44/0x60
>  sysfs_kf_write+0x74/0xc0
>  kernfs_fop_write+0x1b4/0x290
>  __vfs_write+0x3c/0x70
>  vfs_write+0xd0/0x260
>  ksys_write+0xdc/0x130
>  system_call+0x5c/0x68
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/arm64/mm/flush.c     | 11 +++++++++++
>  arch/powerpc/lib/pmem.c   | 21 +++++++++++++++++++--
>  arch/x86/mm/pageattr.c    | 12 ++++++++++++
>  include/linux/libnvdimm.h |  1 +
>  4 files changed, 43 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
> index ac485163a4a7..90c54c600023 100644
> --- a/arch/arm64/mm/flush.c
> +++ b/arch/arm64/mm/flush.c
> @@ -91,4 +91,15 @@ void arch_invalidate_pmem(void *addr, size_t size)
>         __inval_dcache_area(addr, size);
>  }
>  EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
> +
> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
> +{
> +       u32 remainder;
> +
> +       div_u64_rem(size, PAGE_SIZE * ndr_mappings, &remainder);
> +       if (remainder)
> +               return PAGE_SIZE * ndr_mappings;
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
>  #endif
> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
> index 377712e85605..2e661a08dae5 100644
> --- a/arch/powerpc/lib/pmem.c
> +++ b/arch/powerpc/lib/pmem.c
> @@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
>         unsigned long start = (unsigned long) addr;
>         flush_dcache_range(start, start + size);
>  }
> -EXPORT_SYMBOL(arch_wb_cache_pmem);
> +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
>
>  void arch_invalidate_pmem(void *addr, size_t size)
>  {
>         unsigned long start = (unsigned long) addr;
>         flush_dcache_range(start, start + size);
>  }
> -EXPORT_SYMBOL(arch_invalidate_pmem);
> +EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
> +
> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
> +{
> +       u32 remainder;
> +       unsigned long linear_map_size;
> +
> +       if (radix_enabled())
> +               linear_map_size = PAGE_SIZE;
> +       else
> +               linear_map_size = (1UL << mmu_psize_defs[mmu_linear_psize].shift);

This seems more a "supported_alignments" problem, and less a namespace
size or PAGE_SIZE problem, because if the starting address is
misaligned this size validation can still succeed when it shouldn't.

One problem is that __size_store() does not validate the size against
the namespace alignment.

However, the next problem is that alignment is a property of the pfn
device, but not the raw namespace. I think this alignment constraint
should be captured by exposing "align" and "supported_alignments" at
the namespace level as the minimum alignment. The pfn level alignment
could then be an additional alignment constraint, but ndctl would
likely set them to the same value.

The "* ndr_mappings" constraint should be left out of the interface,
because that's a side effect of supporting namespace-type-blk aliasing
which no platform seems to do in practice. If for some strange reason
it's need in the future I'd rather expose the "aliasing" property
rather than fold it into the align / supported_aligns interface.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-28 23:08 ` Dan Williams
@ 2019-10-29  4:34   ` Aneesh Kumar K.V
  2019-10-29  5:30     ` Dan Williams
  0 siblings, 1 reply; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-29  4:34 UTC (permalink / raw)
  To: Dan Williams; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On 10/29/19 4:38 AM, Dan Williams wrote:
> On Mon, Oct 28, 2019 at 2:48 AM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> The page size used to map the namespace is arch dependent. For example
>> architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
>> size is not aligned to the mapping page size, we can observe kernel crash
>> during namespace init and destroy.
>>
>> This is due to kernel doing partial map/unmap of the resource range
>>
>> BUG: Unable to handle kernel data access at 0xc001000406000000
>> Faulting instruction address: 0xc000000000090790
>> NIP [c000000000090790] arch_add_memory+0xc0/0x130
>> LR [c000000000090744] arch_add_memory+0x74/0x130
>> Call Trace:
>>   arch_add_memory+0x74/0x130 (unreliable)
>>   memremap_pages+0x74c/0xa30
>>   devm_memremap_pages+0x3c/0xa0
>>   pmem_attach_disk+0x188/0x770
>>   nvdimm_bus_probe+0xd8/0x470
>>   really_probe+0x148/0x570
>>   driver_probe_device+0x19c/0x1d0
>>   device_driver_attach+0xcc/0x100
>>   bind_store+0x134/0x1c0
>>   drv_attr_store+0x44/0x60
>>   sysfs_kf_write+0x74/0xc0
>>   kernfs_fop_write+0x1b4/0x290
>>   __vfs_write+0x3c/0x70
>>   vfs_write+0xd0/0x260
>>   ksys_write+0xdc/0x130
>>   system_call+0x5c/0x68
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   arch/arm64/mm/flush.c     | 11 +++++++++++
>>   arch/powerpc/lib/pmem.c   | 21 +++++++++++++++++++--
>>   arch/x86/mm/pageattr.c    | 12 ++++++++++++
>>   include/linux/libnvdimm.h |  1 +
>>   4 files changed, 43 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
>> index ac485163a4a7..90c54c600023 100644
>> --- a/arch/arm64/mm/flush.c
>> +++ b/arch/arm64/mm/flush.c
>> @@ -91,4 +91,15 @@ void arch_invalidate_pmem(void *addr, size_t size)
>>          __inval_dcache_area(addr, size);
>>   }
>>   EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
>> +
>> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
>> +{
>> +       u32 remainder;
>> +
>> +       div_u64_rem(size, PAGE_SIZE * ndr_mappings, &remainder);
>> +       if (remainder)
>> +               return PAGE_SIZE * ndr_mappings;
>> +       return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
>>   #endif
>> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
>> index 377712e85605..2e661a08dae5 100644
>> --- a/arch/powerpc/lib/pmem.c
>> +++ b/arch/powerpc/lib/pmem.c
>> @@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
>>          unsigned long start = (unsigned long) addr;
>>          flush_dcache_range(start, start + size);
>>   }
>> -EXPORT_SYMBOL(arch_wb_cache_pmem);
>> +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
>>
>>   void arch_invalidate_pmem(void *addr, size_t size)
>>   {
>>          unsigned long start = (unsigned long) addr;
>>          flush_dcache_range(start, start + size);
>>   }
>> -EXPORT_SYMBOL(arch_invalidate_pmem);
>> +EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
>> +
>> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
>> +{
>> +       u32 remainder;
>> +       unsigned long linear_map_size;
>> +
>> +       if (radix_enabled())
>> +               linear_map_size = PAGE_SIZE;
>> +       else
>> +               linear_map_size = (1UL << mmu_psize_defs[mmu_linear_psize].shift);
> 
> This seems more a "supported_alignments" problem, and less a namespace
> size or PAGE_SIZE problem, because if the starting address is
> misaligned this size validation can still succeed when it shouldn't.
> 


Isn't supported_alignments an indication of how user want the namespace 
to be mapped to applications?  Ie, with the above restrictions we can 
still do both 64K and 16M mapping of the namespace to userspace.

Also for supported alignment the huge page mapping is further  dependent 
on the THP feature.

The restrictions here are mostly w.r.t the direct-mapping page size used 
by some architecture.


> One problem is that __size_store() does not validate the size against
> the namespace alignment.
> 
> However, the next problem is that alignment is a property of the pfn
> device, but not the raw namespace. I think this alignment constraint
> should be captured by exposing "align" and "supported_alignments" at
> the namespace level as the minimum alignment. The pfn level alignment
> could then be an additional alignment constraint, but ndctl would
> likely set them to the same value.
> 
> The "* ndr_mappings" constraint should be left out of the interface,
> because that's a side effect of supporting namespace-type-blk aliasing
> which no platform seems to do in practice. If for some strange reason
> it's need in the future I'd rather expose the "aliasing" property
> rather than fold it into the align / supported_aligns interface.
> 

-aneesh
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-28 21:21 ` [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Ira Weiny
@ 2019-10-29  4:34   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-29  4:34 UTC (permalink / raw)
  To: Ira Weiny; +Cc: mpe, linux-nvdimm, linuxppc-dev

On 10/29/19 2:51 AM, Ira Weiny wrote:
>> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
>> index 377712e85605..2e661a08dae5 100644
>> --- a/arch/powerpc/lib/pmem.c
>> +++ b/arch/powerpc/lib/pmem.c
>> @@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
>>   	unsigned long start = (unsigned long) addr;
>>   	flush_dcache_range(start, start + size);
>>   }
>> -EXPORT_SYMBOL(arch_wb_cache_pmem);
>> +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
> 
> Unrelated change?
> 
>

Yes, will split that as a separate patch.

-aneesh
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-29  4:34   ` Aneesh Kumar K.V
@ 2019-10-29  5:30     ` Dan Williams
  2019-10-31  5:35       ` Aneesh Kumar K.V
  0 siblings, 1 reply; 14+ messages in thread
From: Dan Williams @ 2019-10-29  5:30 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On Mon, Oct 28, 2019 at 9:35 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 10/29/19 4:38 AM, Dan Williams wrote:
> > On Mon, Oct 28, 2019 at 2:48 AM Aneesh Kumar K.V
> > <aneesh.kumar@linux.ibm.com> wrote:
> >>
> >> The page size used to map the namespace is arch dependent. For example
> >> architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
> >> size is not aligned to the mapping page size, we can observe kernel crash
> >> during namespace init and destroy.
> >>
> >> This is due to kernel doing partial map/unmap of the resource range
> >>
> >> BUG: Unable to handle kernel data access at 0xc001000406000000
> >> Faulting instruction address: 0xc000000000090790
> >> NIP [c000000000090790] arch_add_memory+0xc0/0x130
> >> LR [c000000000090744] arch_add_memory+0x74/0x130
> >> Call Trace:
> >>   arch_add_memory+0x74/0x130 (unreliable)
> >>   memremap_pages+0x74c/0xa30
> >>   devm_memremap_pages+0x3c/0xa0
> >>   pmem_attach_disk+0x188/0x770
> >>   nvdimm_bus_probe+0xd8/0x470
> >>   really_probe+0x148/0x570
> >>   driver_probe_device+0x19c/0x1d0
> >>   device_driver_attach+0xcc/0x100
> >>   bind_store+0x134/0x1c0
> >>   drv_attr_store+0x44/0x60
> >>   sysfs_kf_write+0x74/0xc0
> >>   kernfs_fop_write+0x1b4/0x290
> >>   __vfs_write+0x3c/0x70
> >>   vfs_write+0xd0/0x260
> >>   ksys_write+0xdc/0x130
> >>   system_call+0x5c/0x68
> >>
> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> >> ---
> >>   arch/arm64/mm/flush.c     | 11 +++++++++++
> >>   arch/powerpc/lib/pmem.c   | 21 +++++++++++++++++++--
> >>   arch/x86/mm/pageattr.c    | 12 ++++++++++++
> >>   include/linux/libnvdimm.h |  1 +
> >>   4 files changed, 43 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
> >> index ac485163a4a7..90c54c600023 100644
> >> --- a/arch/arm64/mm/flush.c
> >> +++ b/arch/arm64/mm/flush.c
> >> @@ -91,4 +91,15 @@ void arch_invalidate_pmem(void *addr, size_t size)
> >>          __inval_dcache_area(addr, size);
> >>   }
> >>   EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
> >> +
> >> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
> >> +{
> >> +       u32 remainder;
> >> +
> >> +       div_u64_rem(size, PAGE_SIZE * ndr_mappings, &remainder);
> >> +       if (remainder)
> >> +               return PAGE_SIZE * ndr_mappings;
> >> +       return 0;
> >> +}
> >> +EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
> >>   #endif
> >> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
> >> index 377712e85605..2e661a08dae5 100644
> >> --- a/arch/powerpc/lib/pmem.c
> >> +++ b/arch/powerpc/lib/pmem.c
> >> @@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
> >>          unsigned long start = (unsigned long) addr;
> >>          flush_dcache_range(start, start + size);
> >>   }
> >> -EXPORT_SYMBOL(arch_wb_cache_pmem);
> >> +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
> >>
> >>   void arch_invalidate_pmem(void *addr, size_t size)
> >>   {
> >>          unsigned long start = (unsigned long) addr;
> >>          flush_dcache_range(start, start + size);
> >>   }
> >> -EXPORT_SYMBOL(arch_invalidate_pmem);
> >> +EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
> >> +
> >> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
> >> +{
> >> +       u32 remainder;
> >> +       unsigned long linear_map_size;
> >> +
> >> +       if (radix_enabled())
> >> +               linear_map_size = PAGE_SIZE;
> >> +       else
> >> +               linear_map_size = (1UL << mmu_psize_defs[mmu_linear_psize].shift);
> >
> > This seems more a "supported_alignments" problem, and less a namespace
> > size or PAGE_SIZE problem, because if the starting address is
> > misaligned this size validation can still succeed when it shouldn't.
> >
>
>
> Isn't supported_alignments an indication of how user want the namespace
> to be mapped to applications?  Ie, with the above restrictions we can
> still do both 64K and 16M mapping of the namespace to userspace.

True, for the pfn device and the device-dax mapping size, but I'm
suggesting adding another instance of alignment control at the raw
namespace level. That would need to be disconnected from the
device-dax page mapping granularity.

>
> Also for supported alignment the huge page mapping is further  dependent
> on the THP feature.
>
> The restrictions here are mostly w.r.t the direct-mapping page size used
> by some architecture.

Right, that's a base requirement for the namespace that can be
independent of the user page mapping size for device-dax.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-29  5:30     ` Dan Williams
@ 2019-10-31  5:35       ` Aneesh Kumar K.V
  2019-10-31  6:30         ` Dan Williams
  0 siblings, 1 reply; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-31  5:35 UTC (permalink / raw)
  To: Dan Williams; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On 10/29/19 11:00 AM, Dan Williams wrote:
> On Mon, Oct 28, 2019 at 9:35 PM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> On 10/29/19 4:38 AM, Dan Williams wrote:
>>> On Mon, Oct 28, 2019 at 2:48 AM Aneesh Kumar K.V
>>> <aneesh.kumar@linux.ibm.com> wrote:
>>>>
>>>> The page size used to map the namespace is arch dependent. For example
>>>> architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
>>>> size is not aligned to the mapping page size, we can observe kernel crash
>>>> during namespace init and destroy.
>>>>
>>>> This is due to kernel doing partial map/unmap of the resource range
>>>>
>>>> BUG: Unable to handle kernel data access at 0xc001000406000000
>>>> Faulting instruction address: 0xc000000000090790
>>>> NIP [c000000000090790] arch_add_memory+0xc0/0x130
>>>> LR [c000000000090744] arch_add_memory+0x74/0x130
>>>> Call Trace:
>>>>    arch_add_memory+0x74/0x130 (unreliable)
>>>>    memremap_pages+0x74c/0xa30
>>>>    devm_memremap_pages+0x3c/0xa0
>>>>    pmem_attach_disk+0x188/0x770
>>>>    nvdimm_bus_probe+0xd8/0x470
>>>>    really_probe+0x148/0x570
>>>>    driver_probe_device+0x19c/0x1d0
>>>>    device_driver_attach+0xcc/0x100
>>>>    bind_store+0x134/0x1c0
>>>>    drv_attr_store+0x44/0x60
>>>>    sysfs_kf_write+0x74/0xc0
>>>>    kernfs_fop_write+0x1b4/0x290
>>>>    __vfs_write+0x3c/0x70
>>>>    vfs_write+0xd0/0x260
>>>>    ksys_write+0xdc/0x130
>>>>    system_call+0x5c/0x68
>>>>
>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>>> ---
>>>>    arch/arm64/mm/flush.c     | 11 +++++++++++
>>>>    arch/powerpc/lib/pmem.c   | 21 +++++++++++++++++++--
>>>>    arch/x86/mm/pageattr.c    | 12 ++++++++++++
>>>>    include/linux/libnvdimm.h |  1 +
>>>>    4 files changed, 43 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
>>>> index ac485163a4a7..90c54c600023 100644
>>>> --- a/arch/arm64/mm/flush.c
>>>> +++ b/arch/arm64/mm/flush.c
>>>> @@ -91,4 +91,15 @@ void arch_invalidate_pmem(void *addr, size_t size)
>>>>           __inval_dcache_area(addr, size);
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
>>>> +
>>>> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
>>>> +{
>>>> +       u32 remainder;
>>>> +
>>>> +       div_u64_rem(size, PAGE_SIZE * ndr_mappings, &remainder);
>>>> +       if (remainder)
>>>> +               return PAGE_SIZE * ndr_mappings;
>>>> +       return 0;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(arch_validate_namespace_size);
>>>>    #endif
>>>> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
>>>> index 377712e85605..2e661a08dae5 100644
>>>> --- a/arch/powerpc/lib/pmem.c
>>>> +++ b/arch/powerpc/lib/pmem.c
>>>> @@ -17,14 +17,31 @@ void arch_wb_cache_pmem(void *addr, size_t size)
>>>>           unsigned long start = (unsigned long) addr;
>>>>           flush_dcache_range(start, start + size);
>>>>    }
>>>> -EXPORT_SYMBOL(arch_wb_cache_pmem);
>>>> +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
>>>>
>>>>    void arch_invalidate_pmem(void *addr, size_t size)
>>>>    {
>>>>           unsigned long start = (unsigned long) addr;
>>>>           flush_dcache_range(start, start + size);
>>>>    }
>>>> -EXPORT_SYMBOL(arch_invalidate_pmem);
>>>> +EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
>>>> +
>>>> +unsigned long arch_validate_namespace_size(unsigned int ndr_mappings, unsigned long size)
>>>> +{
>>>> +       u32 remainder;
>>>> +       unsigned long linear_map_size;
>>>> +
>>>> +       if (radix_enabled())
>>>> +               linear_map_size = PAGE_SIZE;
>>>> +       else
>>>> +               linear_map_size = (1UL << mmu_psize_defs[mmu_linear_psize].shift);
>>>
>>> This seems more a "supported_alignments" problem, and less a namespace
>>> size or PAGE_SIZE problem, because if the starting address is
>>> misaligned this size validation can still succeed when it shouldn't.
>>>
>>
>>
>> Isn't supported_alignments an indication of how user want the namespace
>> to be mapped to applications?  Ie, with the above restrictions we can
>> still do both 64K and 16M mapping of the namespace to userspace.
> 
> True, for the pfn device and the device-dax mapping size, but I'm
> suggesting adding another instance of alignment control at the raw
> namespace level. That would need to be disconnected from the
> device-dax page mapping granularity.
> 

Can you explain what you mean by raw namespace level ? We don't have 
multiple values against which we need to check the alignment of 
namespace start and namespace size.

If you can outline how and where you would like to enforce that check I 
can start working on it.

-aneesh
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-31  5:35       ` Aneesh Kumar K.V
@ 2019-10-31  6:30         ` Dan Williams
  2019-10-31  8:37           ` Aneesh Kumar K.V
  2019-11-06 10:44           ` Aneesh Kumar K.V
  0 siblings, 2 replies; 14+ messages in thread
From: Dan Williams @ 2019-10-31  6:30 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On Wed, Oct 30, 2019 at 10:35 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
[..]
> > True, for the pfn device and the device-dax mapping size, but I'm
> > suggesting adding another instance of alignment control at the raw
> > namespace level. That would need to be disconnected from the
> > device-dax page mapping granularity.
> >
>
> Can you explain what you mean by raw namespace level ? We don't have
> multiple values against which we need to check the alignment of
> namespace start and namespace size.
>
> If you can outline how and where you would like to enforce that check I
> can start working on it.
>

What I mean is that the process of setting up a pfn namespace goes
something like this in shell script form:

1/ echo $size > /sys/bus/nd/devices/$namespace/size
2/ echo $namespace > /sys/bus/nd/devices/$pfn/namespace
3/ echo $pfn_align > /sys/bus/nd/devices/$pfn/align

What I'm suggesting is add an optional 0th step that does:

echo $raw_align > /sys/bus/nd/devices/$namespace/align

Where the raw align needs to be needs to be max($pfn_align,
arch_mapping_granulariy).

So on powerpc where PAGE_SIZE < arch_mapping_granulariy, the following:

cat /sys/bus/nd/devices/$namespace/supported_aligns

...would show the same output as:

cat /sys/bus/nd/devices/$pfn/align

...but with any alignment choice less than arch_mapping_granulariy removed.



All that said, the x86 vmemmap_populate() falls back to use small
pages in some case to get around this constraint. Can't powerpc do the
same? It would seem to be less work than the above proposal.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-31  6:30         ` Dan Williams
@ 2019-10-31  8:37           ` Aneesh Kumar K.V
  2019-10-31 15:53             ` Dan Williams
  2019-11-06 10:44           ` Aneesh Kumar K.V
  1 sibling, 1 reply; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-10-31  8:37 UTC (permalink / raw)
  To: Dan Williams; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On 10/31/19 12:00 PM, Dan Williams wrote:
> On Wed, Oct 30, 2019 at 10:35 PM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
> [..]

> 
> 
> All that said, the x86 vmemmap_populate() falls back to use small
> pages in some case to get around this constraint. Can't powerpc do the
> same? It would seem to be less work than the above proposal.
> 

ppc64 supports two translation mode (radix and hash). We can do the 
above with radix translation. With hash we use just one page size( in 
this specific case 16MB) for direct-map mapping.

-aneesh
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-31  8:37           ` Aneesh Kumar K.V
@ 2019-10-31 15:53             ` Dan Williams
  0 siblings, 0 replies; 14+ messages in thread
From: Dan Williams @ 2019-10-31 15:53 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

On Thu, Oct 31, 2019 at 1:38 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 10/31/19 12:00 PM, Dan Williams wrote:
> > On Wed, Oct 30, 2019 at 10:35 PM Aneesh Kumar K.V
> > <aneesh.kumar@linux.ibm.com> wrote:
> > [..]
>
> >
> >
> > All that said, the x86 vmemmap_populate() falls back to use small
> > pages in some case to get around this constraint. Can't powerpc do the
> > same? It would seem to be less work than the above proposal.
> >
>
> ppc64 supports two translation mode (radix and hash). We can do the
> above with radix translation. With hash we use just one page size( in
> this specific case 16MB) for direct-map mapping.

Ok, if it's truly a hard constraint then yes, more infrastructure is
needed to expose that constraint to the namespace provisioning flow.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent
  2019-10-31  6:30         ` Dan Williams
  2019-10-31  8:37           ` Aneesh Kumar K.V
@ 2019-11-06 10:44           ` Aneesh Kumar K.V
  1 sibling, 0 replies; 14+ messages in thread
From: Aneesh Kumar K.V @ 2019-11-06 10:44 UTC (permalink / raw)
  To: Dan Williams; +Cc: Michael Ellerman, linux-nvdimm, linuxppc-dev

Dan Williams <dan.j.williams@intel.com> writes:

> On Wed, Oct 30, 2019 at 10:35 PM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
> [..]
>> > True, for the pfn device and the device-dax mapping size, but I'm
>> > suggesting adding another instance of alignment control at the raw
>> > namespace level. That would need to be disconnected from the
>> > device-dax page mapping granularity.
>> >
>>
>> Can you explain what you mean by raw namespace level ? We don't have
>> multiple values against which we need to check the alignment of
>> namespace start and namespace size.
>>
>> If you can outline how and where you would like to enforce that check I
>> can start working on it.
>>
>
> What I mean is that the process of setting up a pfn namespace goes
> something like this in shell script form:
>
> 1/ echo $size > /sys/bus/nd/devices/$namespace/size
> 2/ echo $namespace > /sys/bus/nd/devices/$pfn/namespace
> 3/ echo $pfn_align > /sys/bus/nd/devices/$pfn/align
>
> What I'm suggesting is add an optional 0th step that does:
>
> echo $raw_align > /sys/bus/nd/devices/$namespace/align
>
> Where the raw align needs to be needs to be max($pfn_align,
> arch_mapping_granulariy).


I started looking at this and was wondering about userspace being aware
of the direct-map mapping size. We can figure that out by parsing kernel
log

[    0.000000] Page orders: linear mapping = 24, virtual = 16, io = 16, vmemmap = 24


But I am not sure we want to do that. There is not set of raw_align
value to select. What we need to make sure is the below.

1) While creating a namespace we need to make sure that namespace size
is multiple of direct-map mapping size. If we ensure that
size is aligned, we should also get the namespace start to be aligned?

2) While initialzing a namespace by scanning label area, we need to make
sure every namespace in the region satisfy the above requirement. If not
we should mark the region disabled. 


>
> So on powerpc where PAGE_SIZE < arch_mapping_granulariy, the following:
>
> cat /sys/bus/nd/devices/$namespace/supported_aligns
>
> ...would show the same output as:
>
> cat /sys/bus/nd/devices/$pfn/align
>
> ...but with any alignment choice less than arch_mapping_granulariy removed.
>

I am not sure why we need to do that. For example: even if we have
direct-map mapping size as PAGE_SIZE (64K), we still should allow an user
mapping with hugepage size (16M)?


-aneesh
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-28  9:48 [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Aneesh Kumar K.V
2019-10-28  9:48 ` [RFC PATCH 2/4] libnvdimm/namespace: Disable the region if the namespace size is not aligned correctly Aneesh Kumar K.V
2019-10-28  9:48 ` [RFC PATCH 3/4] libnvdimm/namespace: Use direct-mapping page size to validate namespace size Aneesh Kumar K.V
2019-10-28  9:48 ` [RFC PATCH 4/4] libnvdimm/namespace: Add debug check while initializing namespace resource size Aneesh Kumar K.V
2019-10-28 21:21 ` [RFC PATCH 1/4] libnvdimm/namespace: Make namespace size validation arch dependent Ira Weiny
2019-10-29  4:34   ` Aneesh Kumar K.V
2019-10-28 23:08 ` Dan Williams
2019-10-29  4:34   ` Aneesh Kumar K.V
2019-10-29  5:30     ` Dan Williams
2019-10-31  5:35       ` Aneesh Kumar K.V
2019-10-31  6:30         ` Dan Williams
2019-10-31  8:37           ` Aneesh Kumar K.V
2019-10-31 15:53             ` Dan Williams
2019-11-06 10:44           ` Aneesh Kumar K.V

Linux-NVDIMM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvdimm/0 linux-nvdimm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvdimm linux-nvdimm/ https://lore.kernel.org/linux-nvdimm \
		linux-nvdimm@lists.01.org
	public-inbox-index linux-nvdimm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.01.lists.linux-nvdimm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git