* [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 5:41 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 5:41 UTC (permalink / raw) To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams; +Cc: Aneesh Kumar K.V With POWER10, architecture is adding new pmem flush and sync instructions. The kernel should prevent the usage of MAP_SYNC if applications are not using the new instructions on newer hardware. This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable the usage of MAP_SYNC. The kernel config option is added to allow the user to control whether MAP_SYNC should be enabled by default or not. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- include/linux/sched/coredump.h | 13 ++++++++++--- include/uapi/linux/prctl.h | 3 +++ kernel/fork.c | 8 +++++++- kernel/sys.c | 18 ++++++++++++++++++ mm/Kconfig | 3 +++ mm/mmap.c | 4 ++++ 6 files changed, 45 insertions(+), 4 deletions(-) diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ecdc6542070f..9ba6b3d5f991 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC) -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK) +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \ + MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK) + +static inline bool map_sync_enabled(struct mm_struct *mm) +{ + return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK); +} #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..ee4cde32d5cf 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,4 +238,7 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +#define PR_SET_MAP_SYNC_ENABLE 59 +#define PR_GET_MAP_SYNC_ENABLE 60 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 8c700f881d92..d5a9a363e81e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; +#else +unsigned long default_map_sync_mask = 0; +#endif + static int __init coredump_filter_setup(char *s) { default_dump_filter = @@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->flags = current->mm->flags & MMF_INIT_MASK; mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; } else { - mm->flags = default_dump_filter; + mm->flags = default_dump_filter | default_map_sync_mask; mm->def_flags = 0; } diff --git a/kernel/sys.c b/kernel/sys.c index d325f3ab624a..f6127cf4128b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, clear_bit(MMF_DISABLE_THP, &me->mm->flags); up_write(&me->mm->mmap_sem); break; + + case PR_GET_MAP_SYNC_ENABLE: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); + break; + case PR_SET_MAP_SYNC_ENABLE: + if (arg3 || arg4 || arg5) + return -EINVAL; + if (down_write_killable(&me->mm->mmap_sem)) + return -EINTR; + if (arg2) + clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); + else + set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); + up_write(&me->mm->mmap_sem); + break; + case PR_MPX_ENABLE_MANAGEMENT: case PR_MPX_DISABLE_MANAGEMENT: /* No longer implemented: */ diff --git a/mm/Kconfig b/mm/Kconfig index c1acc34c1c35..38fd7cfbfca8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +config ARCH_MAP_SYNC_DISABLE + bool + endmenu diff --git a/mm/mmap.c b/mm/mmap.c index f609e9ec4a25..613e5894f178 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, case MAP_SHARED_VALIDATE: if (flags & ~flags_mask) return -EOPNOTSUPP; + + if ((flags & MAP_SYNC) && !map_sync_enabled(mm)) + return -EOPNOTSUPP; + if (prot & PROT_WRITE) { if (!(file->f_mode & FMODE_WRITE)) return -EACCES; -- 2.26.2 _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply related [flat|nested] 40+ messages in thread
* [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 5:41 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 5:41 UTC (permalink / raw) To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams; +Cc: Aneesh Kumar K.V, oohall With POWER10, architecture is adding new pmem flush and sync instructions. The kernel should prevent the usage of MAP_SYNC if applications are not using the new instructions on newer hardware. This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable the usage of MAP_SYNC. The kernel config option is added to allow the user to control whether MAP_SYNC should be enabled by default or not. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- include/linux/sched/coredump.h | 13 ++++++++++--- include/uapi/linux/prctl.h | 3 +++ kernel/fork.c | 8 +++++++- kernel/sys.c | 18 ++++++++++++++++++ mm/Kconfig | 3 +++ mm/mmap.c | 4 ++++ 6 files changed, 45 insertions(+), 4 deletions(-) diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ecdc6542070f..9ba6b3d5f991 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC) -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK) +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \ + MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK) + +static inline bool map_sync_enabled(struct mm_struct *mm) +{ + return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK); +} #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..ee4cde32d5cf 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,4 +238,7 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +#define PR_SET_MAP_SYNC_ENABLE 59 +#define PR_GET_MAP_SYNC_ENABLE 60 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 8c700f881d92..d5a9a363e81e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; +#else +unsigned long default_map_sync_mask = 0; +#endif + static int __init coredump_filter_setup(char *s) { default_dump_filter = @@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->flags = current->mm->flags & MMF_INIT_MASK; mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; } else { - mm->flags = default_dump_filter; + mm->flags = default_dump_filter | default_map_sync_mask; mm->def_flags = 0; } diff --git a/kernel/sys.c b/kernel/sys.c index d325f3ab624a..f6127cf4128b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, clear_bit(MMF_DISABLE_THP, &me->mm->flags); up_write(&me->mm->mmap_sem); break; + + case PR_GET_MAP_SYNC_ENABLE: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); + break; + case PR_SET_MAP_SYNC_ENABLE: + if (arg3 || arg4 || arg5) + return -EINVAL; + if (down_write_killable(&me->mm->mmap_sem)) + return -EINTR; + if (arg2) + clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); + else + set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); + up_write(&me->mm->mmap_sem); + break; + case PR_MPX_ENABLE_MANAGEMENT: case PR_MPX_DISABLE_MANAGEMENT: /* No longer implemented: */ diff --git a/mm/Kconfig b/mm/Kconfig index c1acc34c1c35..38fd7cfbfca8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +config ARCH_MAP_SYNC_DISABLE + bool + endmenu diff --git a/mm/mmap.c b/mm/mmap.c index f609e9ec4a25..613e5894f178 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, case MAP_SHARED_VALIDATE: if (flags & ~flags_mask) return -EOPNOTSUPP; + + if ((flags & MAP_SYNC) && !map_sync_enabled(mm)) + return -EOPNOTSUPP; + if (prot & PROT_WRITE) { if (!(file->f_mode & FMODE_WRITE)) return -EACCES; -- 2.26.2 ^ permalink raw reply related [flat|nested] 40+ messages in thread
* [RFC PATCH 2/2] powerpc/pmem: Disable synchronous fault by default. 2020-05-29 5:41 ` Aneesh Kumar K.V @ 2020-05-29 5:41 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 5:41 UTC (permalink / raw) To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams; +Cc: Aneesh Kumar K.V This adds a kernel config option that controls whether MAP_SYNC is enabled by default. With POWER10, architecture is adding new pmem flush and sync instructions. The kernel should prevent the usage of MAP_SYNC if applications are not using the new instructions on newer hardware. This config allows user to control whether MAP_SYNC should be enabled by default or not. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- arch/powerpc/platforms/Kconfig.cputype | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 27a81c291be8..f8694838ad4e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -383,6 +383,15 @@ config PPC_KUEP If you're unsure, say Y. +config ARCH_MAP_SYNC_DISABLE + bool "Disable synchronous fault support (MAP_SYNC)" + default y + help + Disable support for synchronous fault with nvdimm namespaces. + + If you're unsure, say Y. + + config PPC_HAVE_KUAP bool -- 2.26.2 _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply related [flat|nested] 40+ messages in thread
* [RFC PATCH 2/2] powerpc/pmem: Disable synchronous fault by default. @ 2020-05-29 5:41 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 5:41 UTC (permalink / raw) To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams; +Cc: Aneesh Kumar K.V, oohall This adds a kernel config option that controls whether MAP_SYNC is enabled by default. With POWER10, architecture is adding new pmem flush and sync instructions. The kernel should prevent the usage of MAP_SYNC if applications are not using the new instructions on newer hardware. This config allows user to control whether MAP_SYNC should be enabled by default or not. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- arch/powerpc/platforms/Kconfig.cputype | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 27a81c291be8..f8694838ad4e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -383,6 +383,15 @@ config PPC_KUEP If you're unsure, say Y. +config ARCH_MAP_SYNC_DISABLE + bool "Disable synchronous fault support (MAP_SYNC)" + default y + help + Disable support for synchronous fault with nvdimm namespaces. + + If you're unsure, say Y. + + config PPC_HAVE_KUAP bool -- 2.26.2 ^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 5:41 ` Aneesh Kumar K.V @ 2020-05-29 9:33 ` Michal Suchánek -1 siblings, 0 replies; 40+ messages in thread From: Michal Suchánek @ 2020-05-29 9:33 UTC (permalink / raw) To: jack; +Cc: linuxppc-dev, mpe, linux-nvdimm, Aneesh Kumar K.V Adding Jan On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > With POWER10, architecture is adding new pmem flush and sync instructions. > The kernel should prevent the usage of MAP_SYNC if applications are not using > the new instructions on newer hardware. > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > the usage of MAP_SYNC. The kernel config option is added to allow the user > to control whether MAP_SYNC should be enabled by default or not. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > --- > include/linux/sched/coredump.h | 13 ++++++++++--- > include/uapi/linux/prctl.h | 3 +++ > kernel/fork.c | 8 +++++++- > kernel/sys.c | 18 ++++++++++++++++++ > mm/Kconfig | 3 +++ > mm/mmap.c | 4 ++++ > 6 files changed, 45 insertions(+), 4 deletions(-) > > diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h > index ecdc6542070f..9ba6b3d5f991 100644 > --- a/include/linux/sched/coredump.h > +++ b/include/linux/sched/coredump.h > @@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm) > #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ > #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ > #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ > -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) > +#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */ > +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) > +#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC) > > -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ > - MMF_DISABLE_THP_MASK) > +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \ > + MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK) > + > +static inline bool map_sync_enabled(struct mm_struct *mm) > +{ > + return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK); > +} > > #endif /* _LINUX_SCHED_COREDUMP_H */ > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 07b4f8131e36..ee4cde32d5cf 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -238,4 +238,7 @@ struct prctl_mm_map { > #define PR_SET_IO_FLUSHER 57 > #define PR_GET_IO_FLUSHER 58 > > +#define PR_SET_MAP_SYNC_ENABLE 59 > +#define PR_GET_MAP_SYNC_ENABLE 60 > + > #endif /* _LINUX_PRCTL_H */ > diff --git a/kernel/fork.c b/kernel/fork.c > index 8c700f881d92..d5a9a363e81e 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > +#else > +unsigned long default_map_sync_mask = 0; > +#endif > + > static int __init coredump_filter_setup(char *s) > { > default_dump_filter = > @@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, > mm->flags = current->mm->flags & MMF_INIT_MASK; > mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; > } else { > - mm->flags = default_dump_filter; > + mm->flags = default_dump_filter | default_map_sync_mask; > mm->def_flags = 0; > } > > diff --git a/kernel/sys.c b/kernel/sys.c > index d325f3ab624a..f6127cf4128b 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, > clear_bit(MMF_DISABLE_THP, &me->mm->flags); > up_write(&me->mm->mmap_sem); > break; > + > + case PR_GET_MAP_SYNC_ENABLE: > + if (arg2 || arg3 || arg4 || arg5) > + return -EINVAL; > + error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); > + break; > + case PR_SET_MAP_SYNC_ENABLE: > + if (arg3 || arg4 || arg5) > + return -EINVAL; > + if (down_write_killable(&me->mm->mmap_sem)) > + return -EINTR; > + if (arg2) > + clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); > + else > + set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); > + up_write(&me->mm->mmap_sem); > + break; > + > case PR_MPX_ENABLE_MANAGEMENT: > case PR_MPX_DISABLE_MANAGEMENT: > /* No longer implemented: */ > diff --git a/mm/Kconfig b/mm/Kconfig > index c1acc34c1c35..38fd7cfbfca8 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD > config MAPPING_DIRTY_HELPERS > bool > > +config ARCH_MAP_SYNC_DISABLE > + bool > + > endmenu > diff --git a/mm/mmap.c b/mm/mmap.c > index f609e9ec4a25..613e5894f178 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, > case MAP_SHARED_VALIDATE: > if (flags & ~flags_mask) > return -EOPNOTSUPP; > + > + if ((flags & MAP_SYNC) && !map_sync_enabled(mm)) > + return -EOPNOTSUPP; > + > if (prot & PROT_WRITE) { > if (!(file->f_mode & FMODE_WRITE)) > return -EACCES; > -- > 2.26.2 > _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 9:33 ` Michal Suchánek 0 siblings, 0 replies; 40+ messages in thread From: Michal Suchánek @ 2020-05-29 9:33 UTC (permalink / raw) To: jack; +Cc: linux-nvdimm, oohall, Aneesh Kumar K.V, dan.j.williams, linuxppc-dev Adding Jan On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > With POWER10, architecture is adding new pmem flush and sync instructions. > The kernel should prevent the usage of MAP_SYNC if applications are not using > the new instructions on newer hardware. > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > the usage of MAP_SYNC. The kernel config option is added to allow the user > to control whether MAP_SYNC should be enabled by default or not. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > --- > include/linux/sched/coredump.h | 13 ++++++++++--- > include/uapi/linux/prctl.h | 3 +++ > kernel/fork.c | 8 +++++++- > kernel/sys.c | 18 ++++++++++++++++++ > mm/Kconfig | 3 +++ > mm/mmap.c | 4 ++++ > 6 files changed, 45 insertions(+), 4 deletions(-) > > diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h > index ecdc6542070f..9ba6b3d5f991 100644 > --- a/include/linux/sched/coredump.h > +++ b/include/linux/sched/coredump.h > @@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm) > #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ > #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ > #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ > -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) > +#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */ > +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) > +#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC) > > -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ > - MMF_DISABLE_THP_MASK) > +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \ > + MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK) > + > +static inline bool map_sync_enabled(struct mm_struct *mm) > +{ > + return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK); > +} > > #endif /* _LINUX_SCHED_COREDUMP_H */ > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 07b4f8131e36..ee4cde32d5cf 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -238,4 +238,7 @@ struct prctl_mm_map { > #define PR_SET_IO_FLUSHER 57 > #define PR_GET_IO_FLUSHER 58 > > +#define PR_SET_MAP_SYNC_ENABLE 59 > +#define PR_GET_MAP_SYNC_ENABLE 60 > + > #endif /* _LINUX_PRCTL_H */ > diff --git a/kernel/fork.c b/kernel/fork.c > index 8c700f881d92..d5a9a363e81e 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > +#else > +unsigned long default_map_sync_mask = 0; > +#endif > + > static int __init coredump_filter_setup(char *s) > { > default_dump_filter = > @@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, > mm->flags = current->mm->flags & MMF_INIT_MASK; > mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; > } else { > - mm->flags = default_dump_filter; > + mm->flags = default_dump_filter | default_map_sync_mask; > mm->def_flags = 0; > } > > diff --git a/kernel/sys.c b/kernel/sys.c > index d325f3ab624a..f6127cf4128b 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, > clear_bit(MMF_DISABLE_THP, &me->mm->flags); > up_write(&me->mm->mmap_sem); > break; > + > + case PR_GET_MAP_SYNC_ENABLE: > + if (arg2 || arg3 || arg4 || arg5) > + return -EINVAL; > + error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); > + break; > + case PR_SET_MAP_SYNC_ENABLE: > + if (arg3 || arg4 || arg5) > + return -EINVAL; > + if (down_write_killable(&me->mm->mmap_sem)) > + return -EINTR; > + if (arg2) > + clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); > + else > + set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); > + up_write(&me->mm->mmap_sem); > + break; > + > case PR_MPX_ENABLE_MANAGEMENT: > case PR_MPX_DISABLE_MANAGEMENT: > /* No longer implemented: */ > diff --git a/mm/Kconfig b/mm/Kconfig > index c1acc34c1c35..38fd7cfbfca8 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD > config MAPPING_DIRTY_HELPERS > bool > > +config ARCH_MAP_SYNC_DISABLE > + bool > + > endmenu > diff --git a/mm/mmap.c b/mm/mmap.c > index f609e9ec4a25..613e5894f178 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, > case MAP_SHARED_VALIDATE: > if (flags & ~flags_mask) > return -EOPNOTSUPP; > + > + if ((flags & MAP_SYNC) && !map_sync_enabled(mm)) > + return -EOPNOTSUPP; > + > if (prot & PROT_WRITE) { > if (!(file->f_mode & FMODE_WRITE)) > return -EACCES; > -- > 2.26.2 > ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 9:33 ` Michal Suchánek @ 2020-05-29 9:37 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 9:37 UTC (permalink / raw) To: Michal Suchánek, jack; +Cc: linuxppc-dev, mpe, linux-nvdimm Hi, Thanks Michal. I also missed Jeff in this email thread. -aneesh On 5/29/20 3:03 PM, Michal Suchánek wrote: > Adding Jan > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >> With POWER10, architecture is adding new pmem flush and sync instructions. >> The kernel should prevent the usage of MAP_SYNC if applications are not using >> the new instructions on newer hardware. >> >> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >> the usage of MAP_SYNC. The kernel config option is added to allow the user >> to control whether MAP_SYNC should be enabled by default or not. >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >> --- >> include/linux/sched/coredump.h | 13 ++++++++++--- >> include/uapi/linux/prctl.h | 3 +++ >> kernel/fork.c | 8 +++++++- >> kernel/sys.c | 18 ++++++++++++++++++ >> mm/Kconfig | 3 +++ >> mm/mmap.c | 4 ++++ >> 6 files changed, 45 insertions(+), 4 deletions(-) >> >> diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h >> index ecdc6542070f..9ba6b3d5f991 100644 >> --- a/include/linux/sched/coredump.h >> +++ b/include/linux/sched/coredump.h >> @@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm) >> #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ >> #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ >> #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ >> -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) >> +#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */ >> +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) >> +#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC) >> >> -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ >> - MMF_DISABLE_THP_MASK) >> +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \ >> + MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK) >> + >> +static inline bool map_sync_enabled(struct mm_struct *mm) >> +{ >> + return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK); >> +} >> >> #endif /* _LINUX_SCHED_COREDUMP_H */ >> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h >> index 07b4f8131e36..ee4cde32d5cf 100644 >> --- a/include/uapi/linux/prctl.h >> +++ b/include/uapi/linux/prctl.h >> @@ -238,4 +238,7 @@ struct prctl_mm_map { >> #define PR_SET_IO_FLUSHER 57 >> #define PR_GET_IO_FLUSHER 58 >> >> +#define PR_SET_MAP_SYNC_ENABLE 59 >> +#define PR_GET_MAP_SYNC_ENABLE 60 >> + >> #endif /* _LINUX_PRCTL_H */ >> diff --git a/kernel/fork.c b/kernel/fork.c >> index 8c700f881d92..d5a9a363e81e 100644 >> --- a/kernel/fork.c >> +++ b/kernel/fork.c >> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >> >> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >> >> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >> +#else >> +unsigned long default_map_sync_mask = 0; >> +#endif >> + >> static int __init coredump_filter_setup(char *s) >> { >> default_dump_filter = >> @@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, >> mm->flags = current->mm->flags & MMF_INIT_MASK; >> mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; >> } else { >> - mm->flags = default_dump_filter; >> + mm->flags = default_dump_filter | default_map_sync_mask; >> mm->def_flags = 0; >> } >> >> diff --git a/kernel/sys.c b/kernel/sys.c >> index d325f3ab624a..f6127cf4128b 100644 >> --- a/kernel/sys.c >> +++ b/kernel/sys.c >> @@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, >> clear_bit(MMF_DISABLE_THP, &me->mm->flags); >> up_write(&me->mm->mmap_sem); >> break; >> + >> + case PR_GET_MAP_SYNC_ENABLE: >> + if (arg2 || arg3 || arg4 || arg5) >> + return -EINVAL; >> + error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); >> + break; >> + case PR_SET_MAP_SYNC_ENABLE: >> + if (arg3 || arg4 || arg5) >> + return -EINVAL; >> + if (down_write_killable(&me->mm->mmap_sem)) >> + return -EINTR; >> + if (arg2) >> + clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); >> + else >> + set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); >> + up_write(&me->mm->mmap_sem); >> + break; >> + >> case PR_MPX_ENABLE_MANAGEMENT: >> case PR_MPX_DISABLE_MANAGEMENT: >> /* No longer implemented: */ >> diff --git a/mm/Kconfig b/mm/Kconfig >> index c1acc34c1c35..38fd7cfbfca8 100644 >> --- a/mm/Kconfig >> +++ b/mm/Kconfig >> @@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD >> config MAPPING_DIRTY_HELPERS >> bool >> >> +config ARCH_MAP_SYNC_DISABLE >> + bool >> + >> endmenu >> diff --git a/mm/mmap.c b/mm/mmap.c >> index f609e9ec4a25..613e5894f178 100644 >> --- a/mm/mmap.c >> +++ b/mm/mmap.c >> @@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, >> case MAP_SHARED_VALIDATE: >> if (flags & ~flags_mask) >> return -EOPNOTSUPP; >> + >> + if ((flags & MAP_SYNC) && !map_sync_enabled(mm)) >> + return -EOPNOTSUPP; >> + >> if (prot & PROT_WRITE) { >> if (!(file->f_mode & FMODE_WRITE)) >> return -EACCES; >> -- >> 2.26.2 >> _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 9:37 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 9:37 UTC (permalink / raw) To: Michal Suchánek, jack Cc: linux-nvdimm, Jeff Moyer, oohall, dan.j.williams, linuxppc-dev Hi, Thanks Michal. I also missed Jeff in this email thread. -aneesh On 5/29/20 3:03 PM, Michal Suchánek wrote: > Adding Jan > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >> With POWER10, architecture is adding new pmem flush and sync instructions. >> The kernel should prevent the usage of MAP_SYNC if applications are not using >> the new instructions on newer hardware. >> >> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >> the usage of MAP_SYNC. The kernel config option is added to allow the user >> to control whether MAP_SYNC should be enabled by default or not. >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >> --- >> include/linux/sched/coredump.h | 13 ++++++++++--- >> include/uapi/linux/prctl.h | 3 +++ >> kernel/fork.c | 8 +++++++- >> kernel/sys.c | 18 ++++++++++++++++++ >> mm/Kconfig | 3 +++ >> mm/mmap.c | 4 ++++ >> 6 files changed, 45 insertions(+), 4 deletions(-) >> >> diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h >> index ecdc6542070f..9ba6b3d5f991 100644 >> --- a/include/linux/sched/coredump.h >> +++ b/include/linux/sched/coredump.h >> @@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm) >> #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ >> #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ >> #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ >> -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) >> +#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */ >> +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) >> +#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC) >> >> -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ >> - MMF_DISABLE_THP_MASK) >> +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \ >> + MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK) >> + >> +static inline bool map_sync_enabled(struct mm_struct *mm) >> +{ >> + return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK); >> +} >> >> #endif /* _LINUX_SCHED_COREDUMP_H */ >> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h >> index 07b4f8131e36..ee4cde32d5cf 100644 >> --- a/include/uapi/linux/prctl.h >> +++ b/include/uapi/linux/prctl.h >> @@ -238,4 +238,7 @@ struct prctl_mm_map { >> #define PR_SET_IO_FLUSHER 57 >> #define PR_GET_IO_FLUSHER 58 >> >> +#define PR_SET_MAP_SYNC_ENABLE 59 >> +#define PR_GET_MAP_SYNC_ENABLE 60 >> + >> #endif /* _LINUX_PRCTL_H */ >> diff --git a/kernel/fork.c b/kernel/fork.c >> index 8c700f881d92..d5a9a363e81e 100644 >> --- a/kernel/fork.c >> +++ b/kernel/fork.c >> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >> >> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >> >> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >> +#else >> +unsigned long default_map_sync_mask = 0; >> +#endif >> + >> static int __init coredump_filter_setup(char *s) >> { >> default_dump_filter = >> @@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, >> mm->flags = current->mm->flags & MMF_INIT_MASK; >> mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; >> } else { >> - mm->flags = default_dump_filter; >> + mm->flags = default_dump_filter | default_map_sync_mask; >> mm->def_flags = 0; >> } >> >> diff --git a/kernel/sys.c b/kernel/sys.c >> index d325f3ab624a..f6127cf4128b 100644 >> --- a/kernel/sys.c >> +++ b/kernel/sys.c >> @@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, >> clear_bit(MMF_DISABLE_THP, &me->mm->flags); >> up_write(&me->mm->mmap_sem); >> break; >> + >> + case PR_GET_MAP_SYNC_ENABLE: >> + if (arg2 || arg3 || arg4 || arg5) >> + return -EINVAL; >> + error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); >> + break; >> + case PR_SET_MAP_SYNC_ENABLE: >> + if (arg3 || arg4 || arg5) >> + return -EINVAL; >> + if (down_write_killable(&me->mm->mmap_sem)) >> + return -EINTR; >> + if (arg2) >> + clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); >> + else >> + set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags); >> + up_write(&me->mm->mmap_sem); >> + break; >> + >> case PR_MPX_ENABLE_MANAGEMENT: >> case PR_MPX_DISABLE_MANAGEMENT: >> /* No longer implemented: */ >> diff --git a/mm/Kconfig b/mm/Kconfig >> index c1acc34c1c35..38fd7cfbfca8 100644 >> --- a/mm/Kconfig >> +++ b/mm/Kconfig >> @@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD >> config MAPPING_DIRTY_HELPERS >> bool >> >> +config ARCH_MAP_SYNC_DISABLE >> + bool >> + >> endmenu >> diff --git a/mm/mmap.c b/mm/mmap.c >> index f609e9ec4a25..613e5894f178 100644 >> --- a/mm/mmap.c >> +++ b/mm/mmap.c >> @@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, >> case MAP_SHARED_VALIDATE: >> if (flags & ~flags_mask) >> return -EOPNOTSUPP; >> + >> + if ((flags & MAP_SYNC) && !map_sync_enabled(mm)) >> + return -EOPNOTSUPP; >> + >> if (prot & PROT_WRITE) { >> if (!(file->f_mode & FMODE_WRITE)) >> return -EACCES; >> -- >> 2.26.2 >> ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 9:37 ` Aneesh Kumar K.V @ 2020-05-29 9:52 ` Jan Kara -1 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-05-29 9:52 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Michal Suchánek, jack, linuxppc-dev, mpe, linux-nvdimm Hi! On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > Thanks Michal. I also missed Jeff in this email thread. And I think you'll also need some of the sched maintainers for the prctl bits... > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > Adding Jan > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > the new instructions on newer hardware. > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> ... > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > index 8c700f881d92..d5a9a363e81e 100644 > > > --- a/kernel/fork.c > > > +++ b/kernel/fork.c > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > +#else > > > +unsigned long default_map_sync_mask = 0; > > > +#endif > > > + I'm not sure CONFIG is really the right approach here. For a distro that would basically mean to disable MAP_SYNC for all PPC kernels unless application explicitly uses the right prctl. Shouldn't we rather initialize default_map_sync_mask on boot based on whether the CPU we run on requires new flush instructions or not? Otherwise the patch looks sensible. Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 9:52 ` Jan Kara 0 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-05-29 9:52 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, Michal Suchánek, linuxppc-dev Hi! On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > Thanks Michal. I also missed Jeff in this email thread. And I think you'll also need some of the sched maintainers for the prctl bits... > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > Adding Jan > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > the new instructions on newer hardware. > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> ... > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > index 8c700f881d92..d5a9a363e81e 100644 > > > --- a/kernel/fork.c > > > +++ b/kernel/fork.c > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > +#else > > > +unsigned long default_map_sync_mask = 0; > > > +#endif > > > + I'm not sure CONFIG is really the right approach here. For a distro that would basically mean to disable MAP_SYNC for all PPC kernels unless application explicitly uses the right prctl. Shouldn't we rather initialize default_map_sync_mask on boot based on whether the CPU we run on requires new flush instructions or not? Otherwise the patch looks sensible. Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 9:52 ` Jan Kara @ 2020-05-29 10:55 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 10:55 UTC (permalink / raw) To: Jan Kara; +Cc: Michal Suchánek, jack, linuxppc-dev, mpe, linux-nvdimm On 5/29/20 3:22 PM, Jan Kara wrote: > Hi! > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >> Thanks Michal. I also missed Jeff in this email thread. > > And I think you'll also need some of the sched maintainers for the prctl > bits... > >> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>> Adding Jan >>> >>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>> the new instructions on newer hardware. >>>> >>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>> to control whether MAP_SYNC should be enabled by default or not. >>>> >>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > ... >>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>> index 8c700f881d92..d5a9a363e81e 100644 >>>> --- a/kernel/fork.c >>>> +++ b/kernel/fork.c >>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>> +#else >>>> +unsigned long default_map_sync_mask = 0; >>>> +#endif >>>> + > > I'm not sure CONFIG is really the right approach here. For a distro that would > basically mean to disable MAP_SYNC for all PPC kernels unless application > explicitly uses the right prctl. Shouldn't we rather initialize > default_map_sync_mask on boot based on whether the CPU we run on requires > new flush instructions or not? Otherwise the patch looks sensible. > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. But on a virtualized platform there is no easy way to detect that. We could ideally hook this into the nvdimm driver where we look at the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC if we find a device with the specific value. BTW with the recent changes I posted for the nvdimm driver, older kernel won't initialize persistent memory device on newer hardware. Newer hardware will present the device to OS with a different device tree compat string. My expectation w.r.t this patch was, Distro would want to mark CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application certification. Otherwise application will have to end up calling the prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this be dependent on P10? With that I am wondering should we even have this patch? Can we expect userspace get updated to use new instruction?. With ppc64 we never had a real persistent memory device available for end user to try. The available persistent memory stack was using vPMEM which was presented as a volatile memory region for which there is no need to use any of the flush instructions. We could safely assume that as we get applications certified/verified for working with pmem device on ppc64, they would all be using the new instructions? -aneesh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 10:55 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-29 10:55 UTC (permalink / raw) To: Jan Kara Cc: linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, Michal Suchánek, linuxppc-dev On 5/29/20 3:22 PM, Jan Kara wrote: > Hi! > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >> Thanks Michal. I also missed Jeff in this email thread. > > And I think you'll also need some of the sched maintainers for the prctl > bits... > >> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>> Adding Jan >>> >>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>> the new instructions on newer hardware. >>>> >>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>> to control whether MAP_SYNC should be enabled by default or not. >>>> >>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > ... >>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>> index 8c700f881d92..d5a9a363e81e 100644 >>>> --- a/kernel/fork.c >>>> +++ b/kernel/fork.c >>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>> +#else >>>> +unsigned long default_map_sync_mask = 0; >>>> +#endif >>>> + > > I'm not sure CONFIG is really the right approach here. For a distro that would > basically mean to disable MAP_SYNC for all PPC kernels unless application > explicitly uses the right prctl. Shouldn't we rather initialize > default_map_sync_mask on boot based on whether the CPU we run on requires > new flush instructions or not? Otherwise the patch looks sensible. > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. But on a virtualized platform there is no easy way to detect that. We could ideally hook this into the nvdimm driver where we look at the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC if we find a device with the specific value. BTW with the recent changes I posted for the nvdimm driver, older kernel won't initialize persistent memory device on newer hardware. Newer hardware will present the device to OS with a different device tree compat string. My expectation w.r.t this patch was, Distro would want to mark CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application certification. Otherwise application will have to end up calling the prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this be dependent on P10? With that I am wondering should we even have this patch? Can we expect userspace get updated to use new instruction?. With ppc64 we never had a real persistent memory device available for end user to try. The available persistent memory stack was using vPMEM which was presented as a volatile memory region for which there is no need to use any of the flush instructions. We could safely assume that as we get applications certified/verified for working with pmem device on ppc64, they would all be using the new instructions? -aneesh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 10:55 ` Aneesh Kumar K.V @ 2020-05-29 19:22 ` Dan Williams -1 siblings, 0 replies; 40+ messages in thread From: Dan Williams @ 2020-05-29 19:22 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> wrote: > > On 5/29/20 3:22 PM, Jan Kara wrote: > > Hi! > > > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > >> Thanks Michal. I also missed Jeff in this email thread. > > > > And I think you'll also need some of the sched maintainers for the prctl > > bits... > > > >> On 5/29/20 3:03 PM, Michal Suchánek wrote: > >>> Adding Jan > >>> > >>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > >>>> With POWER10, architecture is adding new pmem flush and sync instructions. > >>>> The kernel should prevent the usage of MAP_SYNC if applications are not using > >>>> the new instructions on newer hardware. > >>>> > >>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > >>>> the usage of MAP_SYNC. The kernel config option is added to allow the user > >>>> to control whether MAP_SYNC should be enabled by default or not. > >>>> > >>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > ... > >>>> diff --git a/kernel/fork.c b/kernel/fork.c > >>>> index 8c700f881d92..d5a9a363e81e 100644 > >>>> --- a/kernel/fork.c > >>>> +++ b/kernel/fork.c > >>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > >>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > >>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > >>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > >>>> +#else > >>>> +unsigned long default_map_sync_mask = 0; > >>>> +#endif > >>>> + > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > explicitly uses the right prctl. Shouldn't we rather initialize > > default_map_sync_mask on boot based on whether the CPU we run on requires > > new flush instructions or not? Otherwise the patch looks sensible. > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t > POWER10. But on a virtualized platform there is no easy way to detect > that. We could ideally hook this into the nvdimm driver where we look at > the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC > if we find a device with the specific value. > > BTW with the recent changes I posted for the nvdimm driver, older kernel > won't initialize persistent memory device on newer hardware. Newer > hardware will present the device to OS with a different device tree > compat string. > > My expectation w.r.t this patch was, Distro would want to mark > CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application > certification. Otherwise application will have to end up calling the > prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this > be dependent on P10? > > With that I am wondering should we even have this patch? Can we expect > userspace get updated to use new instruction?. > > With ppc64 we never had a real persistent memory device available for > end user to try. The available persistent memory stack was using vPMEM > which was presented as a volatile memory region for which there is no > need to use any of the flush instructions. We could safely assume that > as we get applications certified/verified for working with pmem device > on ppc64, they would all be using the new instructions? I think prctl is the wrong interface for this. I was thinking a sysfs interface along the same lines as /sys/block/pmemX/dax/write_cache. That attribute is toggling DAXDEV_WRITE_CACHE for the determination of whether the platform or the kernel needs to handle cache flushing relative to power loss. A similar attribute can be established for DAXDEV_SYNC, it would simply default to off based on a configuration time policy, but be dynamically changeable at runtime via sysfs. These flags are device properties that affect the kernel and userspace's handling of persistence. _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-29 19:22 ` Dan Williams 0 siblings, 0 replies; 40+ messages in thread From: Dan Williams @ 2020-05-29 19:22 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> wrote: > > On 5/29/20 3:22 PM, Jan Kara wrote: > > Hi! > > > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > >> Thanks Michal. I also missed Jeff in this email thread. > > > > And I think you'll also need some of the sched maintainers for the prctl > > bits... > > > >> On 5/29/20 3:03 PM, Michal Suchánek wrote: > >>> Adding Jan > >>> > >>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > >>>> With POWER10, architecture is adding new pmem flush and sync instructions. > >>>> The kernel should prevent the usage of MAP_SYNC if applications are not using > >>>> the new instructions on newer hardware. > >>>> > >>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > >>>> the usage of MAP_SYNC. The kernel config option is added to allow the user > >>>> to control whether MAP_SYNC should be enabled by default or not. > >>>> > >>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > ... > >>>> diff --git a/kernel/fork.c b/kernel/fork.c > >>>> index 8c700f881d92..d5a9a363e81e 100644 > >>>> --- a/kernel/fork.c > >>>> +++ b/kernel/fork.c > >>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > >>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > >>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > >>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > >>>> +#else > >>>> +unsigned long default_map_sync_mask = 0; > >>>> +#endif > >>>> + > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > explicitly uses the right prctl. Shouldn't we rather initialize > > default_map_sync_mask on boot based on whether the CPU we run on requires > > new flush instructions or not? Otherwise the patch looks sensible. > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t > POWER10. But on a virtualized platform there is no easy way to detect > that. We could ideally hook this into the nvdimm driver where we look at > the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC > if we find a device with the specific value. > > BTW with the recent changes I posted for the nvdimm driver, older kernel > won't initialize persistent memory device on newer hardware. Newer > hardware will present the device to OS with a different device tree > compat string. > > My expectation w.r.t this patch was, Distro would want to mark > CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application > certification. Otherwise application will have to end up calling the > prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this > be dependent on P10? > > With that I am wondering should we even have this patch? Can we expect > userspace get updated to use new instruction?. > > With ppc64 we never had a real persistent memory device available for > end user to try. The available persistent memory stack was using vPMEM > which was presented as a volatile memory region for which there is no > need to use any of the flush instructions. We could safely assume that > as we get applications certified/verified for working with pmem device > on ppc64, they would all be using the new instructions? I think prctl is the wrong interface for this. I was thinking a sysfs interface along the same lines as /sys/block/pmemX/dax/write_cache. That attribute is toggling DAXDEV_WRITE_CACHE for the determination of whether the platform or the kernel needs to handle cache flushing relative to power loss. A similar attribute can be established for DAXDEV_SYNC, it would simply default to off based on a configuration time policy, but be dynamically changeable at runtime via sysfs. These flags are device properties that affect the kernel and userspace's handling of persistence. ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 19:22 ` Dan Williams @ 2020-05-30 7:18 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-30 7:18 UTC (permalink / raw) To: Dan Williams Cc: Jan Kara, Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm On 5/30/20 12:52 AM, Dan Williams wrote: > On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V > <aneesh.kumar@linux.ibm.com> wrote: >> >> On 5/29/20 3:22 PM, Jan Kara wrote: >>> Hi! >>> >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>> Thanks Michal. I also missed Jeff in this email thread. >>> >>> And I think you'll also need some of the sched maintainers for the prctl >>> bits... >>> >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>> Adding Jan >>>>> >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>> the new instructions on newer hardware. >>>>>> >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>> >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> ... >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>> --- a/kernel/fork.c >>>>>> +++ b/kernel/fork.c >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>> +#else >>>>>> +unsigned long default_map_sync_mask = 0; >>>>>> +#endif >>>>>> + >>> >>> I'm not sure CONFIG is really the right approach here. For a distro that would >>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>> explicitly uses the right prctl. Shouldn't we rather initialize >>> default_map_sync_mask on boot based on whether the CPU we run on requires >>> new flush instructions or not? Otherwise the patch looks sensible. >>> >> >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t >> POWER10. But on a virtualized platform there is no easy way to detect >> that. We could ideally hook this into the nvdimm driver where we look at >> the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC >> if we find a device with the specific value. >> >> BTW with the recent changes I posted for the nvdimm driver, older kernel >> won't initialize persistent memory device on newer hardware. Newer >> hardware will present the device to OS with a different device tree >> compat string. >> >> My expectation w.r.t this patch was, Distro would want to mark >> CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application >> certification. Otherwise application will have to end up calling the >> prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this >> be dependent on P10? >> >> With that I am wondering should we even have this patch? Can we expect >> userspace get updated to use new instruction?. >> >> With ppc64 we never had a real persistent memory device available for >> end user to try. The available persistent memory stack was using vPMEM >> which was presented as a volatile memory region for which there is no >> need to use any of the flush instructions. We could safely assume that >> as we get applications certified/verified for working with pmem device >> on ppc64, they would all be using the new instructions? > > I think prctl is the wrong interface for this. I was thinking a sysfs > interface along the same lines as /sys/block/pmemX/dax/write_cache. > That attribute is toggling DAXDEV_WRITE_CACHE for the determination of > whether the platform or the kernel needs to handle cache flushing > relative to power loss. A similar attribute can be established for > DAXDEV_SYNC, it would simply default to off based on a configuration > time policy, but be dynamically changeable at runtime via sysfs. > > These flags are device properties that affect the kernel and > userspace's handling of persistence. > That will not handle the scenario with multiple applications using the same fsdax mount point where one is updated to use the new instruction and the other is not. -aneeseh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-30 7:18 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-05-30 7:18 UTC (permalink / raw) To: Dan Williams Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev On 5/30/20 12:52 AM, Dan Williams wrote: > On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V > <aneesh.kumar@linux.ibm.com> wrote: >> >> On 5/29/20 3:22 PM, Jan Kara wrote: >>> Hi! >>> >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>> Thanks Michal. I also missed Jeff in this email thread. >>> >>> And I think you'll also need some of the sched maintainers for the prctl >>> bits... >>> >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>> Adding Jan >>>>> >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>> the new instructions on newer hardware. >>>>>> >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>> >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> ... >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>> --- a/kernel/fork.c >>>>>> +++ b/kernel/fork.c >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>> +#else >>>>>> +unsigned long default_map_sync_mask = 0; >>>>>> +#endif >>>>>> + >>> >>> I'm not sure CONFIG is really the right approach here. For a distro that would >>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>> explicitly uses the right prctl. Shouldn't we rather initialize >>> default_map_sync_mask on boot based on whether the CPU we run on requires >>> new flush instructions or not? Otherwise the patch looks sensible. >>> >> >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t >> POWER10. But on a virtualized platform there is no easy way to detect >> that. We could ideally hook this into the nvdimm driver where we look at >> the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC >> if we find a device with the specific value. >> >> BTW with the recent changes I posted for the nvdimm driver, older kernel >> won't initialize persistent memory device on newer hardware. Newer >> hardware will present the device to OS with a different device tree >> compat string. >> >> My expectation w.r.t this patch was, Distro would want to mark >> CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application >> certification. Otherwise application will have to end up calling the >> prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this >> be dependent on P10? >> >> With that I am wondering should we even have this patch? Can we expect >> userspace get updated to use new instruction?. >> >> With ppc64 we never had a real persistent memory device available for >> end user to try. The available persistent memory stack was using vPMEM >> which was presented as a volatile memory region for which there is no >> need to use any of the flush instructions. We could safely assume that >> as we get applications certified/verified for working with pmem device >> on ppc64, they would all be using the new instructions? > > I think prctl is the wrong interface for this. I was thinking a sysfs > interface along the same lines as /sys/block/pmemX/dax/write_cache. > That attribute is toggling DAXDEV_WRITE_CACHE for the determination of > whether the platform or the kernel needs to handle cache flushing > relative to power loss. A similar attribute can be established for > DAXDEV_SYNC, it would simply default to off based on a configuration > time policy, but be dynamically changeable at runtime via sysfs. > > These flags are device properties that affect the kernel and > userspace's handling of persistence. > That will not handle the scenario with multiple applications using the same fsdax mount point where one is updated to use the new instruction and the other is not. -aneeseh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-30 7:18 ` Aneesh Kumar K.V @ 2020-05-30 16:35 ` Dan Williams -1 siblings, 0 replies; 40+ messages in thread From: Dan Williams @ 2020-05-30 16:35 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm On Sat, May 30, 2020 at 12:18 AM Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> wrote: > > On 5/30/20 12:52 AM, Dan Williams wrote: > > On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V > > <aneesh.kumar@linux.ibm.com> wrote: > >> > >> On 5/29/20 3:22 PM, Jan Kara wrote: > >>> Hi! > >>> > >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > >>>> Thanks Michal. I also missed Jeff in this email thread. > >>> > >>> And I think you'll also need some of the sched maintainers for the prctl > >>> bits... > >>> > >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: > >>>>> Adding Jan > >>>>> > >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. > >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using > >>>>>> the new instructions on newer hardware. > >>>>>> > >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user > >>>>>> to control whether MAP_SYNC should be enabled by default or not. > >>>>>> > >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > >>> ... > >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c > >>>>>> index 8c700f881d92..d5a9a363e81e 100644 > >>>>>> --- a/kernel/fork.c > >>>>>> +++ b/kernel/fork.c > >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > >>>>>> +#else > >>>>>> +unsigned long default_map_sync_mask = 0; > >>>>>> +#endif > >>>>>> + > >>> > >>> I'm not sure CONFIG is really the right approach here. For a distro that would > >>> basically mean to disable MAP_SYNC for all PPC kernels unless application > >>> explicitly uses the right prctl. Shouldn't we rather initialize > >>> default_map_sync_mask on boot based on whether the CPU we run on requires > >>> new flush instructions or not? Otherwise the patch looks sensible. > >>> > >> > >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t > >> POWER10. But on a virtualized platform there is no easy way to detect > >> that. We could ideally hook this into the nvdimm driver where we look at > >> the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC > >> if we find a device with the specific value. > >> > >> BTW with the recent changes I posted for the nvdimm driver, older kernel > >> won't initialize persistent memory device on newer hardware. Newer > >> hardware will present the device to OS with a different device tree > >> compat string. > >> > >> My expectation w.r.t this patch was, Distro would want to mark > >> CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application > >> certification. Otherwise application will have to end up calling the > >> prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this > >> be dependent on P10? > >> > >> With that I am wondering should we even have this patch? Can we expect > >> userspace get updated to use new instruction?. > >> > >> With ppc64 we never had a real persistent memory device available for > >> end user to try. The available persistent memory stack was using vPMEM > >> which was presented as a volatile memory region for which there is no > >> need to use any of the flush instructions. We could safely assume that > >> as we get applications certified/verified for working with pmem device > >> on ppc64, they would all be using the new instructions? > > > > I think prctl is the wrong interface for this. I was thinking a sysfs > > interface along the same lines as /sys/block/pmemX/dax/write_cache. > > That attribute is toggling DAXDEV_WRITE_CACHE for the determination of > > whether the platform or the kernel needs to handle cache flushing > > relative to power loss. A similar attribute can be established for > > DAXDEV_SYNC, it would simply default to off based on a configuration > > time policy, but be dynamically changeable at runtime via sysfs. > > > > These flags are device properties that affect the kernel and > > userspace's handling of persistence. > > > > That will not handle the scenario with multiple applications using the > same fsdax mount point where one is updated to use the new instruction > and the other is not. Right, it needs to be a global setting / flag day to switch from one regime to another. Per-process control is a recipe for disaster. _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-05-30 16:35 ` Dan Williams 0 siblings, 0 replies; 40+ messages in thread From: Dan Williams @ 2020-05-30 16:35 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev On Sat, May 30, 2020 at 12:18 AM Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> wrote: > > On 5/30/20 12:52 AM, Dan Williams wrote: > > On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V > > <aneesh.kumar@linux.ibm.com> wrote: > >> > >> On 5/29/20 3:22 PM, Jan Kara wrote: > >>> Hi! > >>> > >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > >>>> Thanks Michal. I also missed Jeff in this email thread. > >>> > >>> And I think you'll also need some of the sched maintainers for the prctl > >>> bits... > >>> > >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: > >>>>> Adding Jan > >>>>> > >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. > >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using > >>>>>> the new instructions on newer hardware. > >>>>>> > >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user > >>>>>> to control whether MAP_SYNC should be enabled by default or not. > >>>>>> > >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > >>> ... > >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c > >>>>>> index 8c700f881d92..d5a9a363e81e 100644 > >>>>>> --- a/kernel/fork.c > >>>>>> +++ b/kernel/fork.c > >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > >>>>>> +#else > >>>>>> +unsigned long default_map_sync_mask = 0; > >>>>>> +#endif > >>>>>> + > >>> > >>> I'm not sure CONFIG is really the right approach here. For a distro that would > >>> basically mean to disable MAP_SYNC for all PPC kernels unless application > >>> explicitly uses the right prctl. Shouldn't we rather initialize > >>> default_map_sync_mask on boot based on whether the CPU we run on requires > >>> new flush instructions or not? Otherwise the patch looks sensible. > >>> > >> > >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t > >> POWER10. But on a virtualized platform there is no easy way to detect > >> that. We could ideally hook this into the nvdimm driver where we look at > >> the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC > >> if we find a device with the specific value. > >> > >> BTW with the recent changes I posted for the nvdimm driver, older kernel > >> won't initialize persistent memory device on newer hardware. Newer > >> hardware will present the device to OS with a different device tree > >> compat string. > >> > >> My expectation w.r.t this patch was, Distro would want to mark > >> CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application > >> certification. Otherwise application will have to end up calling the > >> prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this > >> be dependent on P10? > >> > >> With that I am wondering should we even have this patch? Can we expect > >> userspace get updated to use new instruction?. > >> > >> With ppc64 we never had a real persistent memory device available for > >> end user to try. The available persistent memory stack was using vPMEM > >> which was presented as a volatile memory region for which there is no > >> need to use any of the flush instructions. We could safely assume that > >> as we get applications certified/verified for working with pmem device > >> on ppc64, they would all be using the new instructions? > > > > I think prctl is the wrong interface for this. I was thinking a sysfs > > interface along the same lines as /sys/block/pmemX/dax/write_cache. > > That attribute is toggling DAXDEV_WRITE_CACHE for the determination of > > whether the platform or the kernel needs to handle cache flushing > > relative to power loss. A similar attribute can be established for > > DAXDEV_SYNC, it would simply default to off based on a configuration > > time policy, but be dynamically changeable at runtime via sysfs. > > > > These flags are device properties that affect the kernel and > > userspace's handling of persistence. > > > > That will not handle the scenario with multiple applications using the > same fsdax mount point where one is updated to use the new instruction > and the other is not. Right, it needs to be a global setting / flag day to switch from one regime to another. Per-process control is a recipe for disaster. ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-30 16:35 ` Dan Williams @ 2020-06-01 9:50 ` Jan Kara -1 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-01 9:50 UTC (permalink / raw) To: Dan Williams Cc: Aneesh Kumar K.V, Jan Kara, Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm On Sat 30-05-20 09:35:19, Dan Williams wrote: > On Sat, May 30, 2020 at 12:18 AM Aneesh Kumar K.V > <aneesh.kumar@linux.ibm.com> wrote: > > > > On 5/30/20 12:52 AM, Dan Williams wrote: > > > On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V > > > <aneesh.kumar@linux.ibm.com> wrote: > > >> > > >> On 5/29/20 3:22 PM, Jan Kara wrote: > > >>> Hi! > > >>> > > >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > >>>> Thanks Michal. I also missed Jeff in this email thread. > > >>> > > >>> And I think you'll also need some of the sched maintainers for the prctl > > >>> bits... > > >>> > > >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: > > >>>>> Adding Jan > > >>>>> > > >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. > > >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using > > >>>>>> the new instructions on newer hardware. > > >>>>>> > > >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user > > >>>>>> to control whether MAP_SYNC should be enabled by default or not. > > >>>>>> > > >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > >>> ... > > >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c > > >>>>>> index 8c700f881d92..d5a9a363e81e 100644 > > >>>>>> --- a/kernel/fork.c > > >>>>>> +++ b/kernel/fork.c > > >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > >>>>>> +#else > > >>>>>> +unsigned long default_map_sync_mask = 0; > > >>>>>> +#endif > > >>>>>> + > > >>> > > >>> I'm not sure CONFIG is really the right approach here. For a distro that would > > >>> basically mean to disable MAP_SYNC for all PPC kernels unless application > > >>> explicitly uses the right prctl. Shouldn't we rather initialize > > >>> default_map_sync_mask on boot based on whether the CPU we run on requires > > >>> new flush instructions or not? Otherwise the patch looks sensible. > > >>> > > >> > > >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t > > >> POWER10. But on a virtualized platform there is no easy way to detect > > >> that. We could ideally hook this into the nvdimm driver where we look at > > >> the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC > > >> if we find a device with the specific value. > > >> > > >> BTW with the recent changes I posted for the nvdimm driver, older kernel > > >> won't initialize persistent memory device on newer hardware. Newer > > >> hardware will present the device to OS with a different device tree > > >> compat string. > > >> > > >> My expectation w.r.t this patch was, Distro would want to mark > > >> CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application > > >> certification. Otherwise application will have to end up calling the > > >> prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this > > >> be dependent on P10? > > >> > > >> With that I am wondering should we even have this patch? Can we expect > > >> userspace get updated to use new instruction?. > > >> > > >> With ppc64 we never had a real persistent memory device available for > > >> end user to try. The available persistent memory stack was using vPMEM > > >> which was presented as a volatile memory region for which there is no > > >> need to use any of the flush instructions. We could safely assume that > > >> as we get applications certified/verified for working with pmem device > > >> on ppc64, they would all be using the new instructions? > > > > > > I think prctl is the wrong interface for this. I was thinking a sysfs > > > interface along the same lines as /sys/block/pmemX/dax/write_cache. > > > That attribute is toggling DAXDEV_WRITE_CACHE for the determination of > > > whether the platform or the kernel needs to handle cache flushing > > > relative to power loss. A similar attribute can be established for > > > DAXDEV_SYNC, it would simply default to off based on a configuration > > > time policy, but be dynamically changeable at runtime via sysfs. > > > > > > These flags are device properties that affect the kernel and > > > userspace's handling of persistence. > > > > > > > That will not handle the scenario with multiple applications using the > > same fsdax mount point where one is updated to use the new instruction > > and the other is not. > > Right, it needs to be a global setting / flag day to switch from one > regime to another. Per-process control is a recipe for disaster. First I'd like to mention that hopefully the concern is mostly theoretical since as Aneesh wrote above, real persistent memory never shipped for PPC and so there are very few apps (if any) using the old way to ensure cache flushing. But I'd like to understand why do you think per-process control is a recipe for disaster? Because from my POV the sysfs interface you propose is actually difficult to use in practice. As a distributor, you have hard time picking the default because you have a choice between picking safe option which is going to confuse users because of failing MAP_SYNC and unsafe option where everyone will be happy until someone looses data because of some ancient application using wrong instructions to persist data. Poor experience for users in either way. And when distro defaults to "safe option", then the burden is on the sysadmin to toggle the switch but how is he supposed to decide when that is safe? First he has to understand what the problem actually is, then he has to audit all the applications using pmem whether they use the new instruction - which is IMO a lot of effort if you have a couple of applications and practically infeasible if you have more of them. So IMO the burden should be *on the application* to declare that it is aware of the new instructions to flush pmem on the platform and only to such application the kernel should give the trust to use MAP_SYNC mappings. Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-01 9:50 ` Jan Kara 0 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-01 9:50 UTC (permalink / raw) To: Dan Williams Cc: Jan Kara, linux-nvdimm, Aneesh Kumar K.V, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev On Sat 30-05-20 09:35:19, Dan Williams wrote: > On Sat, May 30, 2020 at 12:18 AM Aneesh Kumar K.V > <aneesh.kumar@linux.ibm.com> wrote: > > > > On 5/30/20 12:52 AM, Dan Williams wrote: > > > On Fri, May 29, 2020 at 3:55 AM Aneesh Kumar K.V > > > <aneesh.kumar@linux.ibm.com> wrote: > > >> > > >> On 5/29/20 3:22 PM, Jan Kara wrote: > > >>> Hi! > > >>> > > >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > >>>> Thanks Michal. I also missed Jeff in this email thread. > > >>> > > >>> And I think you'll also need some of the sched maintainers for the prctl > > >>> bits... > > >>> > > >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: > > >>>>> Adding Jan > > >>>>> > > >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. > > >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using > > >>>>>> the new instructions on newer hardware. > > >>>>>> > > >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user > > >>>>>> to control whether MAP_SYNC should be enabled by default or not. > > >>>>>> > > >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > >>> ... > > >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c > > >>>>>> index 8c700f881d92..d5a9a363e81e 100644 > > >>>>>> --- a/kernel/fork.c > > >>>>>> +++ b/kernel/fork.c > > >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > >>>>>> +#else > > >>>>>> +unsigned long default_map_sync_mask = 0; > > >>>>>> +#endif > > >>>>>> + > > >>> > > >>> I'm not sure CONFIG is really the right approach here. For a distro that would > > >>> basically mean to disable MAP_SYNC for all PPC kernels unless application > > >>> explicitly uses the right prctl. Shouldn't we rather initialize > > >>> default_map_sync_mask on boot based on whether the CPU we run on requires > > >>> new flush instructions or not? Otherwise the patch looks sensible. > > >>> > > >> > > >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t > > >> POWER10. But on a virtualized platform there is no easy way to detect > > >> that. We could ideally hook this into the nvdimm driver where we look at > > >> the new compat string ibm,persistent-memory-v2 and then disable MAP_SYNC > > >> if we find a device with the specific value. > > >> > > >> BTW with the recent changes I posted for the nvdimm driver, older kernel > > >> won't initialize persistent memory device on newer hardware. Newer > > >> hardware will present the device to OS with a different device tree > > >> compat string. > > >> > > >> My expectation w.r.t this patch was, Distro would want to mark > > >> CONFIG_ARCH_MAP_SYNC_DISABLE=n based on the different application > > >> certification. Otherwise application will have to end up calling the > > >> prctl(MMF_DISABLE_MAP_SYNC, 0) any way. If that is the case, should this > > >> be dependent on P10? > > >> > > >> With that I am wondering should we even have this patch? Can we expect > > >> userspace get updated to use new instruction?. > > >> > > >> With ppc64 we never had a real persistent memory device available for > > >> end user to try. The available persistent memory stack was using vPMEM > > >> which was presented as a volatile memory region for which there is no > > >> need to use any of the flush instructions. We could safely assume that > > >> as we get applications certified/verified for working with pmem device > > >> on ppc64, they would all be using the new instructions? > > > > > > I think prctl is the wrong interface for this. I was thinking a sysfs > > > interface along the same lines as /sys/block/pmemX/dax/write_cache. > > > That attribute is toggling DAXDEV_WRITE_CACHE for the determination of > > > whether the platform or the kernel needs to handle cache flushing > > > relative to power loss. A similar attribute can be established for > > > DAXDEV_SYNC, it would simply default to off based on a configuration > > > time policy, but be dynamically changeable at runtime via sysfs. > > > > > > These flags are device properties that affect the kernel and > > > userspace's handling of persistence. > > > > > > > That will not handle the scenario with multiple applications using the > > same fsdax mount point where one is updated to use the new instruction > > and the other is not. > > Right, it needs to be a global setting / flag day to switch from one > regime to another. Per-process control is a recipe for disaster. First I'd like to mention that hopefully the concern is mostly theoretical since as Aneesh wrote above, real persistent memory never shipped for PPC and so there are very few apps (if any) using the old way to ensure cache flushing. But I'd like to understand why do you think per-process control is a recipe for disaster? Because from my POV the sysfs interface you propose is actually difficult to use in practice. As a distributor, you have hard time picking the default because you have a choice between picking safe option which is going to confuse users because of failing MAP_SYNC and unsafe option where everyone will be happy until someone looses data because of some ancient application using wrong instructions to persist data. Poor experience for users in either way. And when distro defaults to "safe option", then the burden is on the sysadmin to toggle the switch but how is he supposed to decide when that is safe? First he has to understand what the problem actually is, then he has to audit all the applications using pmem whether they use the new instruction - which is IMO a lot of effort if you have a couple of applications and practically infeasible if you have more of them. So IMO the burden should be *on the application* to declare that it is aware of the new instructions to flush pmem on the platform and only to such application the kernel should give the trust to use MAP_SYNC mappings. Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR ^ permalink raw reply [flat|nested] 40+ messages in thread
* RE: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-01 9:50 ` Jan Kara @ 2020-06-02 17:59 ` Williams, Dan J -1 siblings, 0 replies; 40+ messages in thread From: Williams, Dan J @ 2020-06-02 17:59 UTC (permalink / raw) To: Jan Kara; +Cc: Aneesh Kumar K.V, Michal Suchánek, jack, linuxppc-dev [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] > From: Jan Kara <jack@suse.cz> > > > > These flags are device properties that affect the kernel and > > > > userspace's handling of persistence. > > > > > > > > > > That will not handle the scenario with multiple applications using > > > the same fsdax mount point where one is updated to use the new > > > instruction and the other is not. > > > > Right, it needs to be a global setting / flag day to switch from one > > regime to another. Per-process control is a recipe for disaster. > > First I'd like to mention that hopefully the concern is mostly theoretical since > as Aneesh wrote above, real persistent memory never shipped for PPC and > so there are very few apps (if any) using the old way to ensure cache > flushing. > > But I'd like to understand why do you think per-process control is a recipe for > disaster? Because from my POV the sysfs interface you propose is actually > difficult to use in practice. As a distributor, you have hard time picking the > default because you have a choice between picking safe option which is > going to confuse users because of failing MAP_SYNC and unsafe option > where everyone will be happy until someone looses data because of some > ancient application using wrong instructions to persist data. Poor experience > for users in either way. And when distro defaults to "safe option", then the > burden is on the sysadmin to toggle the switch but how is he supposed to > decide when that is safe? First he has to understand what the problem > actually is, then he has to audit all the applications using pmem whether they > use the new instruction - which is IMO a lot of effort if you have a couple of > applications and practically infeasible if you have more of them. > So IMO the burden should be *on the application* to declare that it is aware > of the new instructions to flush pmem on the platform and only to such > application the kernel should give the trust to use MAP_SYNC mappings. The "disaster" in my mind is this need to globally change the ABI for persistence semantics for all of Linux because one CPU wants a do over. What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing deployed base of persistent memory applications? Yes, sysfs is awkward, but it's trying to provide some relief without imposing unexplainable semantics on everyone else. I think a comprehensive (overengineered) solution would involve not introducing another "I know what I'm doing" flag to the interface, but maybe requiring applications to call a pmem sync API in something like a vsyscall. Or, also overengineered, some binary translation / interpretation to actively detect and kill applications that deploy the old instructions. Something horrid like on first write fault to a MAP_SYNC try to look ahead in the binary for the correct sync sequence and kill the application otherwise. That would at least provide some enforcement and safety without requiring other architectures to co nsider what MAP_SYNC_ENABLE means to them. _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* RE: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-02 17:59 ` Williams, Dan J 0 siblings, 0 replies; 40+ messages in thread From: Williams, Dan J @ 2020-06-02 17:59 UTC (permalink / raw) To: Jan Kara Cc: linux-nvdimm, Aneesh Kumar K.V, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] > From: Jan Kara <jack@suse.cz> > > > > These flags are device properties that affect the kernel and > > > > userspace's handling of persistence. > > > > > > > > > > That will not handle the scenario with multiple applications using > > > the same fsdax mount point where one is updated to use the new > > > instruction and the other is not. > > > > Right, it needs to be a global setting / flag day to switch from one > > regime to another. Per-process control is a recipe for disaster. > > First I'd like to mention that hopefully the concern is mostly theoretical since > as Aneesh wrote above, real persistent memory never shipped for PPC and > so there are very few apps (if any) using the old way to ensure cache > flushing. > > But I'd like to understand why do you think per-process control is a recipe for > disaster? Because from my POV the sysfs interface you propose is actually > difficult to use in practice. As a distributor, you have hard time picking the > default because you have a choice between picking safe option which is > going to confuse users because of failing MAP_SYNC and unsafe option > where everyone will be happy until someone looses data because of some > ancient application using wrong instructions to persist data. Poor experience > for users in either way. And when distro defaults to "safe option", then the > burden is on the sysadmin to toggle the switch but how is he supposed to > decide when that is safe? First he has to understand what the problem > actually is, then he has to audit all the applications using pmem whether they > use the new instruction - which is IMO a lot of effort if you have a couple of > applications and practically infeasible if you have more of them. > So IMO the burden should be *on the application* to declare that it is aware > of the new instructions to flush pmem on the platform and only to such > application the kernel should give the trust to use MAP_SYNC mappings. The "disaster" in my mind is this need to globally change the ABI for persistence semantics for all of Linux because one CPU wants a do over. What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing deployed base of persistent memory applications? Yes, sysfs is awkward, but it's trying to provide some relief without imposing unexplainable semantics on everyone else. I think a comprehensive (overengineered) solution would involve not introducing another "I know what I'm doing" flag to the interface, but maybe requiring applications to call a pmem sync API in something like a vsyscall. Or, also overengineered, some binary translation / interpretation to actively detect and kill applications that deploy the old instructions. Something horrid like on first write fault to a MAP_SYNC try to look ahead in the binary for the correct sync sequence and kill the application otherwise. That would at least provide some enforcement and safety without requiring other architectures to consider what MAP_SYNC_ENABLE means to them. ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-02 17:59 ` Williams, Dan J @ 2020-06-03 8:26 ` Jan Kara -1 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-03 8:26 UTC (permalink / raw) To: Williams, Dan J Cc: Jan Kara, Aneesh Kumar K.V, Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm On Tue 02-06-20 17:59:08, Williams, Dan J wrote: > [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] > > > From: Jan Kara <jack@suse.cz> > > > > > These flags are device properties that affect the kernel and > > > > > userspace's handling of persistence. > > > > > > > > > > > > > That will not handle the scenario with multiple applications using > > > > the same fsdax mount point where one is updated to use the new > > > > instruction and the other is not. > > > > > > Right, it needs to be a global setting / flag day to switch from one > > > regime to another. Per-process control is a recipe for disaster. > > > > First I'd like to mention that hopefully the concern is mostly theoretical since > > as Aneesh wrote above, real persistent memory never shipped for PPC and > > so there are very few apps (if any) using the old way to ensure cache > > flushing. > > > > But I'd like to understand why do you think per-process control is a recipe for > > disaster? Because from my POV the sysfs interface you propose is actually > > difficult to use in practice. As a distributor, you have hard time picking the > > default because you have a choice between picking safe option which is > > going to confuse users because of failing MAP_SYNC and unsafe option > > where everyone will be happy until someone looses data because of some > > ancient application using wrong instructions to persist data. Poor experience > > for users in either way. And when distro defaults to "safe option", then the > > burden is on the sysadmin to toggle the switch but how is he supposed to > > decide when that is safe? First he has to understand what the problem > > actually is, then he has to audit all the applications using pmem whether they > > use the new instruction - which is IMO a lot of effort if you have a couple of > > applications and practically infeasible if you have more of them. > > So IMO the burden should be *on the application* to declare that it is aware > > of the new instructions to flush pmem on the platform and only to such > > application the kernel should give the trust to use MAP_SYNC mappings. > > The "disaster" in my mind is this need to globally change the ABI for > persistence semantics for all of Linux because one CPU wants a do over. > What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing > deployed base of persistent memory applications? Yes, sysfs is awkward, > but it's trying to provide some relief without imposing unexplainable > semantics on everyone else. I think a comprehensive (overengineered) > solution would involve not introducing another "I know what I'm doing" > flag to the interface, but maybe requiring applications to call a pmem > sync API in something like a vsyscall. Or, also overengineered, some > binary translation / interpretation to actively detect and kill > applications that deploy the old instructions. Something horrid like on > first write fault to a MAP_SYNC try to look ahead in the binary for the > correct sync sequence and kill the application otherwise. That would at > least provide some enforcement and safety without requiring other > architectures to consider what MAP_SYNC_ENABLE means to them. Thanks for explanation. So I absolutely agree that other architectures (and even older versions of POWER architecture) must not be influenced by the new tunable. That's why I wrote in my reply to Aneesh that I'd be for checking during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC flush instructions are required and *only in that case* decide based on the prctl value whether MAP_SYNC should be allowed or not. Whether this solution is overengineering or not depends on how you think it's likely there will be applications trying to use old flush instructions with MAP_SYNC on POWER10 platforms... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-03 8:26 ` Jan Kara 0 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-03 8:26 UTC (permalink / raw) To: Williams, Dan J Cc: Jan Kara, linux-nvdimm, Aneesh Kumar K.V, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev On Tue 02-06-20 17:59:08, Williams, Dan J wrote: > [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] > > > From: Jan Kara <jack@suse.cz> > > > > > These flags are device properties that affect the kernel and > > > > > userspace's handling of persistence. > > > > > > > > > > > > > That will not handle the scenario with multiple applications using > > > > the same fsdax mount point where one is updated to use the new > > > > instruction and the other is not. > > > > > > Right, it needs to be a global setting / flag day to switch from one > > > regime to another. Per-process control is a recipe for disaster. > > > > First I'd like to mention that hopefully the concern is mostly theoretical since > > as Aneesh wrote above, real persistent memory never shipped for PPC and > > so there are very few apps (if any) using the old way to ensure cache > > flushing. > > > > But I'd like to understand why do you think per-process control is a recipe for > > disaster? Because from my POV the sysfs interface you propose is actually > > difficult to use in practice. As a distributor, you have hard time picking the > > default because you have a choice between picking safe option which is > > going to confuse users because of failing MAP_SYNC and unsafe option > > where everyone will be happy until someone looses data because of some > > ancient application using wrong instructions to persist data. Poor experience > > for users in either way. And when distro defaults to "safe option", then the > > burden is on the sysadmin to toggle the switch but how is he supposed to > > decide when that is safe? First he has to understand what the problem > > actually is, then he has to audit all the applications using pmem whether they > > use the new instruction - which is IMO a lot of effort if you have a couple of > > applications and practically infeasible if you have more of them. > > So IMO the burden should be *on the application* to declare that it is aware > > of the new instructions to flush pmem on the platform and only to such > > application the kernel should give the trust to use MAP_SYNC mappings. > > The "disaster" in my mind is this need to globally change the ABI for > persistence semantics for all of Linux because one CPU wants a do over. > What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing > deployed base of persistent memory applications? Yes, sysfs is awkward, > but it's trying to provide some relief without imposing unexplainable > semantics on everyone else. I think a comprehensive (overengineered) > solution would involve not introducing another "I know what I'm doing" > flag to the interface, but maybe requiring applications to call a pmem > sync API in something like a vsyscall. Or, also overengineered, some > binary translation / interpretation to actively detect and kill > applications that deploy the old instructions. Something horrid like on > first write fault to a MAP_SYNC try to look ahead in the binary for the > correct sync sequence and kill the application otherwise. That would at > least provide some enforcement and safety without requiring other > architectures to consider what MAP_SYNC_ENABLE means to them. Thanks for explanation. So I absolutely agree that other architectures (and even older versions of POWER architecture) must not be influenced by the new tunable. That's why I wrote in my reply to Aneesh that I'd be for checking during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC flush instructions are required and *only in that case* decide based on the prctl value whether MAP_SYNC should be allowed or not. Whether this solution is overengineering or not depends on how you think it's likely there will be applications trying to use old flush instructions with MAP_SYNC on POWER10 platforms... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-03 8:26 ` Jan Kara @ 2020-06-03 9:09 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-03 9:09 UTC (permalink / raw) To: Jan Kara, Williams, Dan J Cc: Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm On 6/3/20 1:56 PM, Jan Kara wrote: > On Tue 02-06-20 17:59:08, Williams, Dan J wrote: >> [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] >> >>> From: Jan Kara <jack@suse.cz> >>>>>> These flags are device properties that affect the kernel and >>>>>> userspace's handling of persistence. >>>>>> >>>>> >>>>> That will not handle the scenario with multiple applications using >>>>> the same fsdax mount point where one is updated to use the new >>>>> instruction and the other is not. >>>> >>>> Right, it needs to be a global setting / flag day to switch from one >>>> regime to another. Per-process control is a recipe for disaster. >>> >>> First I'd like to mention that hopefully the concern is mostly theoretical since >>> as Aneesh wrote above, real persistent memory never shipped for PPC and >>> so there are very few apps (if any) using the old way to ensure cache >>> flushing. >>> >>> But I'd like to understand why do you think per-process control is a recipe for >>> disaster? Because from my POV the sysfs interface you propose is actually >>> difficult to use in practice. As a distributor, you have hard time picking the >>> default because you have a choice between picking safe option which is >>> going to confuse users because of failing MAP_SYNC and unsafe option >>> where everyone will be happy until someone looses data because of some >>> ancient application using wrong instructions to persist data. Poor experience >>> for users in either way. And when distro defaults to "safe option", then the >>> burden is on the sysadmin to toggle the switch but how is he supposed to >>> decide when that is safe? First he has to understand what the problem >>> actually is, then he has to audit all the applications using pmem whether they >>> use the new instruction - which is IMO a lot of effort if you have a couple of >>> applications and practically infeasible if you have more of them. >>> So IMO the burden should be *on the application* to declare that it is aware >>> of the new instructions to flush pmem on the platform and only to such >>> application the kernel should give the trust to use MAP_SYNC mappings. >> >> The "disaster" in my mind is this need to globally change the ABI for >> persistence semantics for all of Linux because one CPU wants a do over. >> What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing >> deployed base of persistent memory applications? Yes, sysfs is awkward, >> but it's trying to provide some relief without imposing unexplainable >> semantics on everyone else. I think a comprehensive (overengineered) >> solution would involve not introducing another "I know what I'm doing" >> flag to the interface, but maybe requiring applications to call a pmem >> sync API in something like a vsyscall. Or, also overengineered, some >> binary translation / interpretation to actively detect and kill >> applications that deploy the old instructions. Something horrid like on >> first write fault to a MAP_SYNC try to look ahead in the binary for the >> correct sync sequence and kill the application otherwise. That would at >> least provide some enforcement and safety without requiring other >> architectures to consider what MAP_SYNC_ENABLE means to them. > > Thanks for explanation. So I absolutely agree that other architectures (and > even older versions of POWER architecture) must not be influenced by the new > tunable. That's why I wrote in my reply to Aneesh that I'd be for checking > during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC > flush instructions are required and *only in that case* decide based on the > prctl value whether MAP_SYNC should be allowed or not. > v2 version of the patch series does that https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/ > Whether this solution is overengineering or not depends on how you think > it's likely there will be applications trying to use old flush instructions > with MAP_SYNC on POWER10 platforms... > Now considering that with ppc64 we never had a real persistent memory device available for the end-user to try and the new instructions are only needed on newer hardware, can we assume we have enough time to get the userspace to use new instructions? As a safety net, we can keep the dax device-specific sysfs control. But in reality, by the time newer hardware gets released, we can get the distributions updated to flip the CONFIG_ARCH_MAP_SYNC_DISABLE=n? With this: 1) vPMEM continues to work and since it is a volatile region. That doesn't need any flush instructions. 2) We get pmdk and other user applications updated to use new instructions and make sure updated packages are made available to all distributions 3) On newer hardware, the device will appear with a new compat string. Hence older distributions won't initialize pmem on newer hardware. 4) If we have a newer kernel with an older distro, we use the per namespace sysfs knob that prevents the usage of MAP_SYNC. 5) After a year or so we mark the CONFIG_ARCH_MAP_SYNC_DISABLE=n on ppc64 when we are confident that everybody is using the new flush instruction. -aneesh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-03 9:09 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-03 9:09 UTC (permalink / raw) To: Jan Kara, Williams, Dan J Cc: linux-nvdimm, jack, Jeff Moyer, Oliver O'Halloran, Michal Suchánek, linuxppc-dev On 6/3/20 1:56 PM, Jan Kara wrote: > On Tue 02-06-20 17:59:08, Williams, Dan J wrote: >> [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] >> >>> From: Jan Kara <jack@suse.cz> >>>>>> These flags are device properties that affect the kernel and >>>>>> userspace's handling of persistence. >>>>>> >>>>> >>>>> That will not handle the scenario with multiple applications using >>>>> the same fsdax mount point where one is updated to use the new >>>>> instruction and the other is not. >>>> >>>> Right, it needs to be a global setting / flag day to switch from one >>>> regime to another. Per-process control is a recipe for disaster. >>> >>> First I'd like to mention that hopefully the concern is mostly theoretical since >>> as Aneesh wrote above, real persistent memory never shipped for PPC and >>> so there are very few apps (if any) using the old way to ensure cache >>> flushing. >>> >>> But I'd like to understand why do you think per-process control is a recipe for >>> disaster? Because from my POV the sysfs interface you propose is actually >>> difficult to use in practice. As a distributor, you have hard time picking the >>> default because you have a choice between picking safe option which is >>> going to confuse users because of failing MAP_SYNC and unsafe option >>> where everyone will be happy until someone looses data because of some >>> ancient application using wrong instructions to persist data. Poor experience >>> for users in either way. And when distro defaults to "safe option", then the >>> burden is on the sysadmin to toggle the switch but how is he supposed to >>> decide when that is safe? First he has to understand what the problem >>> actually is, then he has to audit all the applications using pmem whether they >>> use the new instruction - which is IMO a lot of effort if you have a couple of >>> applications and practically infeasible if you have more of them. >>> So IMO the burden should be *on the application* to declare that it is aware >>> of the new instructions to flush pmem on the platform and only to such >>> application the kernel should give the trust to use MAP_SYNC mappings. >> >> The "disaster" in my mind is this need to globally change the ABI for >> persistence semantics for all of Linux because one CPU wants a do over. >> What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing >> deployed base of persistent memory applications? Yes, sysfs is awkward, >> but it's trying to provide some relief without imposing unexplainable >> semantics on everyone else. I think a comprehensive (overengineered) >> solution would involve not introducing another "I know what I'm doing" >> flag to the interface, but maybe requiring applications to call a pmem >> sync API in something like a vsyscall. Or, also overengineered, some >> binary translation / interpretation to actively detect and kill >> applications that deploy the old instructions. Something horrid like on >> first write fault to a MAP_SYNC try to look ahead in the binary for the >> correct sync sequence and kill the application otherwise. That would at >> least provide some enforcement and safety without requiring other >> architectures to consider what MAP_SYNC_ENABLE means to them. > > Thanks for explanation. So I absolutely agree that other architectures (and > even older versions of POWER architecture) must not be influenced by the new > tunable. That's why I wrote in my reply to Aneesh that I'd be for checking > during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC > flush instructions are required and *only in that case* decide based on the > prctl value whether MAP_SYNC should be allowed or not. > v2 version of the patch series does that https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/ > Whether this solution is overengineering or not depends on how you think > it's likely there will be applications trying to use old flush instructions > with MAP_SYNC on POWER10 platforms... > Now considering that with ppc64 we never had a real persistent memory device available for the end-user to try and the new instructions are only needed on newer hardware, can we assume we have enough time to get the userspace to use new instructions? As a safety net, we can keep the dax device-specific sysfs control. But in reality, by the time newer hardware gets released, we can get the distributions updated to flip the CONFIG_ARCH_MAP_SYNC_DISABLE=n? With this: 1) vPMEM continues to work and since it is a volatile region. That doesn't need any flush instructions. 2) We get pmdk and other user applications updated to use new instructions and make sure updated packages are made available to all distributions 3) On newer hardware, the device will appear with a new compat string. Hence older distributions won't initialize pmem on newer hardware. 4) If we have a newer kernel with an older distro, we use the per namespace sysfs knob that prevents the usage of MAP_SYNC. 5) After a year or so we mark the CONFIG_ARCH_MAP_SYNC_DISABLE=n on ppc64 when we are confident that everybody is using the new flush instruction. -aneesh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-03 9:09 ` Aneesh Kumar K.V @ 2020-06-08 7:42 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-08 7:42 UTC (permalink / raw) To: Jan Kara, Williams, Dan J Cc: Michal Suchánek, jack, linuxppc-dev, Michael Ellerman, linux-nvdimm "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > On 6/3/20 1:56 PM, Jan Kara wrote: >> On Tue 02-06-20 17:59:08, Williams, Dan J wrote: >>> [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] >>> >>>> From: Jan Kara <jack@suse.cz> >>>>>>> These flags are device properties that affect the kernel and >>>>>>> userspace's handling of persistence. >>>>>>> >>>>>> >>>>>> That will not handle the scenario with multiple applications using >>>>>> the same fsdax mount point where one is updated to use the new >>>>>> instruction and the other is not. >>>>> >>>>> Right, it needs to be a global setting / flag day to switch from one >>>>> regime to another. Per-process control is a recipe for disaster. >>>> >>>> First I'd like to mention that hopefully the concern is mostly theoretical since >>>> as Aneesh wrote above, real persistent memory never shipped for PPC and >>>> so there are very few apps (if any) using the old way to ensure cache >>>> flushing. >>>> >>>> But I'd like to understand why do you think per-process control is a recipe for >>>> disaster? Because from my POV the sysfs interface you propose is actually >>>> difficult to use in practice. As a distributor, you have hard time picking the >>>> default because you have a choice between picking safe option which is >>>> going to confuse users because of failing MAP_SYNC and unsafe option >>>> where everyone will be happy until someone looses data because of some >>>> ancient application using wrong instructions to persist data. Poor experience >>>> for users in either way. And when distro defaults to "safe option", then the >>>> burden is on the sysadmin to toggle the switch but how is he supposed to >>>> decide when that is safe? First he has to understand what the problem >>>> actually is, then he has to audit all the applications using pmem whether they >>>> use the new instruction - which is IMO a lot of effort if you have a couple of >>>> applications and practically infeasible if you have more of them. >>>> So IMO the burden should be *on the application* to declare that it is aware >>>> of the new instructions to flush pmem on the platform and only to such >>>> application the kernel should give the trust to use MAP_SYNC mappings. >>> >>> The "disaster" in my mind is this need to globally change the ABI for >>> persistence semantics for all of Linux because one CPU wants a do over. >>> What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing >>> deployed base of persistent memory applications? Yes, sysfs is awkward, >>> but it's trying to provide some relief without imposing unexplainable >>> semantics on everyone else. I think a comprehensive (overengineered) >>> solution would involve not introducing another "I know what I'm doing" >>> flag to the interface, but maybe requiring applications to call a pmem >>> sync API in something like a vsyscall. Or, also overengineered, some >>> binary translation / interpretation to actively detect and kill >>> applications that deploy the old instructions. Something horrid like on >>> first write fault to a MAP_SYNC try to look ahead in the binary for the >>> correct sync sequence and kill the application otherwise. That would at >>> least provide some enforcement and safety without requiring other >>> architectures to consider what MAP_SYNC_ENABLE means to them. >> >> Thanks for explanation. So I absolutely agree that other architectures (and >> even older versions of POWER architecture) must not be influenced by the new >> tunable. That's why I wrote in my reply to Aneesh that I'd be for checking >> during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC >> flush instructions are required and *only in that case* decide based on the >> prctl value whether MAP_SYNC should be allowed or not. >> > > v2 version of the patch series does that > > https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/ > >> Whether this solution is overengineering or not depends on how you think >> it's likely there will be applications trying to use old flush instructions >> with MAP_SYNC on POWER10 platforms... >> > > Now considering that with ppc64 we never had a real persistent memory > device available for the end-user to try and the new instructions are > only needed on newer hardware, can we assume we have enough time to get > the userspace to use new instructions? > > As a safety net, we can keep the dax device-specific sysfs control. But > in reality, by the time newer hardware gets released, we can get the > distributions updated to flip the CONFIG_ARCH_MAP_SYNC_DISABLE=n? > > With this: > 1) vPMEM continues to work and since it is a volatile region. That > doesn't need any flush instructions. > > 2) We get pmdk and other user applications updated to use new > instructions and make sure updated packages are made available to all > distributions > > 3) On newer hardware, the device will appear with a new compat string. > Hence older distributions won't initialize pmem on newer hardware. > > 4) If we have a newer kernel with an older distro, we use the per > namespace sysfs knob that prevents the usage of MAP_SYNC. > > 5) After a year or so we mark the CONFIG_ARCH_MAP_SYNC_DISABLE=n > on ppc64 when we are confident that everybody is using the new flush > instruction. > Is this approach ok for distributions? If so I can repost the series dropping the prctl change. -aneesh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-08 7:42 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-08 7:42 UTC (permalink / raw) To: Jan Kara, Williams, Dan J Cc: linux-nvdimm, jack, Jeff Moyer, Michal Suchánek, linuxppc-dev "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > On 6/3/20 1:56 PM, Jan Kara wrote: >> On Tue 02-06-20 17:59:08, Williams, Dan J wrote: >>> [ forgive formatting, a series of unfortunate events has me using Outlook for the moment ] >>> >>>> From: Jan Kara <jack@suse.cz> >>>>>>> These flags are device properties that affect the kernel and >>>>>>> userspace's handling of persistence. >>>>>>> >>>>>> >>>>>> That will not handle the scenario with multiple applications using >>>>>> the same fsdax mount point where one is updated to use the new >>>>>> instruction and the other is not. >>>>> >>>>> Right, it needs to be a global setting / flag day to switch from one >>>>> regime to another. Per-process control is a recipe for disaster. >>>> >>>> First I'd like to mention that hopefully the concern is mostly theoretical since >>>> as Aneesh wrote above, real persistent memory never shipped for PPC and >>>> so there are very few apps (if any) using the old way to ensure cache >>>> flushing. >>>> >>>> But I'd like to understand why do you think per-process control is a recipe for >>>> disaster? Because from my POV the sysfs interface you propose is actually >>>> difficult to use in practice. As a distributor, you have hard time picking the >>>> default because you have a choice between picking safe option which is >>>> going to confuse users because of failing MAP_SYNC and unsafe option >>>> where everyone will be happy until someone looses data because of some >>>> ancient application using wrong instructions to persist data. Poor experience >>>> for users in either way. And when distro defaults to "safe option", then the >>>> burden is on the sysadmin to toggle the switch but how is he supposed to >>>> decide when that is safe? First he has to understand what the problem >>>> actually is, then he has to audit all the applications using pmem whether they >>>> use the new instruction - which is IMO a lot of effort if you have a couple of >>>> applications and practically infeasible if you have more of them. >>>> So IMO the burden should be *on the application* to declare that it is aware >>>> of the new instructions to flush pmem on the platform and only to such >>>> application the kernel should give the trust to use MAP_SYNC mappings. >>> >>> The "disaster" in my mind is this need to globally change the ABI for >>> persistence semantics for all of Linux because one CPU wants a do over. >>> What does a generic "MAP_SYNC_ENABLE" knob even mean to the existing >>> deployed base of persistent memory applications? Yes, sysfs is awkward, >>> but it's trying to provide some relief without imposing unexplainable >>> semantics on everyone else. I think a comprehensive (overengineered) >>> solution would involve not introducing another "I know what I'm doing" >>> flag to the interface, but maybe requiring applications to call a pmem >>> sync API in something like a vsyscall. Or, also overengineered, some >>> binary translation / interpretation to actively detect and kill >>> applications that deploy the old instructions. Something horrid like on >>> first write fault to a MAP_SYNC try to look ahead in the binary for the >>> correct sync sequence and kill the application otherwise. That would at >>> least provide some enforcement and safety without requiring other >>> architectures to consider what MAP_SYNC_ENABLE means to them. >> >> Thanks for explanation. So I absolutely agree that other architectures (and >> even older versions of POWER architecture) must not be influenced by the new >> tunable. That's why I wrote in my reply to Aneesh that I'd be for checking >> during mmap(2) with MAP_SYNC, whether we are in a situation where new PPC >> flush instructions are required and *only in that case* decide based on the >> prctl value whether MAP_SYNC should be allowed or not. >> > > v2 version of the patch series does that > > https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/ > >> Whether this solution is overengineering or not depends on how you think >> it's likely there will be applications trying to use old flush instructions >> with MAP_SYNC on POWER10 platforms... >> > > Now considering that with ppc64 we never had a real persistent memory > device available for the end-user to try and the new instructions are > only needed on newer hardware, can we assume we have enough time to get > the userspace to use new instructions? > > As a safety net, we can keep the dax device-specific sysfs control. But > in reality, by the time newer hardware gets released, we can get the > distributions updated to flip the CONFIG_ARCH_MAP_SYNC_DISABLE=n? > > With this: > 1) vPMEM continues to work and since it is a volatile region. That > doesn't need any flush instructions. > > 2) We get pmdk and other user applications updated to use new > instructions and make sure updated packages are made available to all > distributions > > 3) On newer hardware, the device will appear with a new compat string. > Hence older distributions won't initialize pmem on newer hardware. > > 4) If we have a newer kernel with an older distro, we use the per > namespace sysfs knob that prevents the usage of MAP_SYNC. > > 5) After a year or so we mark the CONFIG_ARCH_MAP_SYNC_DISABLE=n > on ppc64 when we are confident that everybody is using the new flush > instruction. > Is this approach ok for distributions? If so I can repost the series dropping the prctl change. -aneesh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-05-29 10:55 ` Aneesh Kumar K.V @ 2020-06-01 10:09 ` Jan Kara -1 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-01 10:09 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, Michal Suchánek, jack, linuxppc-dev, mpe, linux-nvdimm On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: > On 5/29/20 3:22 PM, Jan Kara wrote: > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > > Thanks Michal. I also missed Jeff in this email thread. > > > > And I think you'll also need some of the sched maintainers for the prctl > > bits... > > > > > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > > > Adding Jan > > > > > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > > > the new instructions on newer hardware. > > > > > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > ... > > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > > > index 8c700f881d92..d5a9a363e81e 100644 > > > > > --- a/kernel/fork.c > > > > > +++ b/kernel/fork.c > > > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > > > +#else > > > > > +unsigned long default_map_sync_mask = 0; > > > > > +#endif > > > > > + > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > explicitly uses the right prctl. Shouldn't we rather initialize > > default_map_sync_mask on boot based on whether the CPU we run on requires > > new flush instructions or not? Otherwise the patch looks sensible. > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. > But on a virtualized platform there is no easy way to detect that. We could > ideally hook this into the nvdimm driver where we look at the new compat > string ibm,persistent-memory-v2 and then disable MAP_SYNC > if we find a device with the specific value. Hum, couldn't we set some flag for nvdimm devices with "ibm,persistent-memory-v2" property and then check it during mmap(2) time and when the device has this propery and the mmap(2) caller doesn't have the prctl set, we'd disallow MAP_SYNC? That should make things mostly seamless, shouldn't it? Only apps that want to use MAP_SYNC on these devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these applications need to be aware of new instructions so this isn't that much additional burden... > With that I am wondering should we even have this patch? Can we expect > userspace get updated to use new instruction?. > > With ppc64 we never had a real persistent memory device available for end > user to try. The available persistent memory stack was using vPMEM which was > presented as a volatile memory region for which there is no need to use any > of the flush instructions. We could safely assume that as we get > applications certified/verified for working with pmem device on ppc64, they > would all be using the new instructions? This is a bit of a gamble... I don't have too much trust in certification / verification because only the "big players" may do powerfail testing throughout enough that they'd uncover these problems. So the question really is: How many apps are out there using MAP_SYNC on ppc64? Hopefully not many given the HW didn't ship yet as you wrote but I have no real clue. Similarly there's a question: How many app writers will read manual for older ppc64 architecture and write apps that won't work reliably on POWER10? Again, I have no idea. So the prctl would be IMHO a nice safety belt but I'm not 100% certain it will be needed... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-01 10:09 ` Jan Kara 0 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-01 10:09 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, Michal Suchánek, linuxppc-dev On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: > On 5/29/20 3:22 PM, Jan Kara wrote: > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > > Thanks Michal. I also missed Jeff in this email thread. > > > > And I think you'll also need some of the sched maintainers for the prctl > > bits... > > > > > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > > > Adding Jan > > > > > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > > > the new instructions on newer hardware. > > > > > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > ... > > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > > > index 8c700f881d92..d5a9a363e81e 100644 > > > > > --- a/kernel/fork.c > > > > > +++ b/kernel/fork.c > > > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > > > +#else > > > > > +unsigned long default_map_sync_mask = 0; > > > > > +#endif > > > > > + > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > explicitly uses the right prctl. Shouldn't we rather initialize > > default_map_sync_mask on boot based on whether the CPU we run on requires > > new flush instructions or not? Otherwise the patch looks sensible. > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. > But on a virtualized platform there is no easy way to detect that. We could > ideally hook this into the nvdimm driver where we look at the new compat > string ibm,persistent-memory-v2 and then disable MAP_SYNC > if we find a device with the specific value. Hum, couldn't we set some flag for nvdimm devices with "ibm,persistent-memory-v2" property and then check it during mmap(2) time and when the device has this propery and the mmap(2) caller doesn't have the prctl set, we'd disallow MAP_SYNC? That should make things mostly seamless, shouldn't it? Only apps that want to use MAP_SYNC on these devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these applications need to be aware of new instructions so this isn't that much additional burden... > With that I am wondering should we even have this patch? Can we expect > userspace get updated to use new instruction?. > > With ppc64 we never had a real persistent memory device available for end > user to try. The available persistent memory stack was using vPMEM which was > presented as a volatile memory region for which there is no need to use any > of the flush instructions. We could safely assume that as we get > applications certified/verified for working with pmem device on ppc64, they > would all be using the new instructions? This is a bit of a gamble... I don't have too much trust in certification / verification because only the "big players" may do powerfail testing throughout enough that they'd uncover these problems. So the question really is: How many apps are out there using MAP_SYNC on ppc64? Hopefully not many given the HW didn't ship yet as you wrote but I have no real clue. Similarly there's a question: How many app writers will read manual for older ppc64 architecture and write apps that won't work reliably on POWER10? Again, I have no idea. So the prctl would be IMHO a nice safety belt but I'm not 100% certain it will be needed... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-01 10:09 ` Jan Kara @ 2020-06-01 12:01 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-01 12:01 UTC (permalink / raw) To: Jan Kara; +Cc: Michal Suchánek, jack, linuxppc-dev, mpe, linux-nvdimm On 6/1/20 3:39 PM, Jan Kara wrote: > On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: >> On 5/29/20 3:22 PM, Jan Kara wrote: >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>> Thanks Michal. I also missed Jeff in this email thread. >>> >>> And I think you'll also need some of the sched maintainers for the prctl >>> bits... >>> >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>> Adding Jan >>>>> >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>> the new instructions on newer hardware. >>>>>> >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>> >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> ... >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>> --- a/kernel/fork.c >>>>>> +++ b/kernel/fork.c >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>> +#else >>>>>> +unsigned long default_map_sync_mask = 0; >>>>>> +#endif >>>>>> + >>> >>> I'm not sure CONFIG is really the right approach here. For a distro that would >>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>> explicitly uses the right prctl. Shouldn't we rather initialize >>> default_map_sync_mask on boot based on whether the CPU we run on requires >>> new flush instructions or not? Otherwise the patch looks sensible. >>> >> >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. >> But on a virtualized platform there is no easy way to detect that. We could >> ideally hook this into the nvdimm driver where we look at the new compat >> string ibm,persistent-memory-v2 and then disable MAP_SYNC >> if we find a device with the specific value. > > Hum, couldn't we set some flag for nvdimm devices with > "ibm,persistent-memory-v2" property and then check it during mmap(2) time > and when the device has this propery and the mmap(2) caller doesn't have > the prctl set, we'd disallow MAP_SYNC? That should make things mostly > seamless, shouldn't it? Only apps that want to use MAP_SYNC on these > devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these > applications need to be aware of new instructions so this isn't that much > additional burden... I am not sure application would want to add that much details/knowledge about a platform in their code. I was expecting application to do #ifdef __ppc64__ prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); #endif a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 is not useful. Do you see a value in making all these device specific rather than a conditional on __ppc64__? > >> With that I am wondering should we even have this patch? Can we expect >> userspace get updated to use new instruction?. >> >> With ppc64 we never had a real persistent memory device available for end >> user to try. The available persistent memory stack was using vPMEM which was >> presented as a volatile memory region for which there is no need to use any >> of the flush instructions. We could safely assume that as we get >> applications certified/verified for working with pmem device on ppc64, they >> would all be using the new instructions? > > This is a bit of a gamble... I don't have too much trust in certification / > verification because only the "big players" may do powerfail testing > throughout enough that they'd uncover these problems. So the question > really is: How many apps are out there using MAP_SYNC on ppc64? Hopefully > not many given the HW didn't ship yet as you wrote but I have no real clue. > Similarly there's a question: How many app writers will read manual for > older ppc64 architecture and write apps that won't work reliably on > POWER10? Again, I have no idea. > > So the prctl would be IMHO a nice safety belt but I'm not 100% certain it > will be needed... > > -aneesh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-01 12:01 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-01 12:01 UTC (permalink / raw) To: Jan Kara Cc: linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, Michal Suchánek, linuxppc-dev On 6/1/20 3:39 PM, Jan Kara wrote: > On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: >> On 5/29/20 3:22 PM, Jan Kara wrote: >>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>> Thanks Michal. I also missed Jeff in this email thread. >>> >>> And I think you'll also need some of the sched maintainers for the prctl >>> bits... >>> >>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>> Adding Jan >>>>> >>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>> the new instructions on newer hardware. >>>>>> >>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>> >>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> ... >>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>> --- a/kernel/fork.c >>>>>> +++ b/kernel/fork.c >>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>> +#else >>>>>> +unsigned long default_map_sync_mask = 0; >>>>>> +#endif >>>>>> + >>> >>> I'm not sure CONFIG is really the right approach here. For a distro that would >>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>> explicitly uses the right prctl. Shouldn't we rather initialize >>> default_map_sync_mask on boot based on whether the CPU we run on requires >>> new flush instructions or not? Otherwise the patch looks sensible. >>> >> >> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. >> But on a virtualized platform there is no easy way to detect that. We could >> ideally hook this into the nvdimm driver where we look at the new compat >> string ibm,persistent-memory-v2 and then disable MAP_SYNC >> if we find a device with the specific value. > > Hum, couldn't we set some flag for nvdimm devices with > "ibm,persistent-memory-v2" property and then check it during mmap(2) time > and when the device has this propery and the mmap(2) caller doesn't have > the prctl set, we'd disallow MAP_SYNC? That should make things mostly > seamless, shouldn't it? Only apps that want to use MAP_SYNC on these > devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these > applications need to be aware of new instructions so this isn't that much > additional burden... I am not sure application would want to add that much details/knowledge about a platform in their code. I was expecting application to do #ifdef __ppc64__ prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); #endif a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 is not useful. Do you see a value in making all these device specific rather than a conditional on __ppc64__? > >> With that I am wondering should we even have this patch? Can we expect >> userspace get updated to use new instruction?. >> >> With ppc64 we never had a real persistent memory device available for end >> user to try. The available persistent memory stack was using vPMEM which was >> presented as a volatile memory region for which there is no need to use any >> of the flush instructions. We could safely assume that as we get >> applications certified/verified for working with pmem device on ppc64, they >> would all be using the new instructions? > > This is a bit of a gamble... I don't have too much trust in certification / > verification because only the "big players" may do powerfail testing > throughout enough that they'd uncover these problems. So the question > really is: How many apps are out there using MAP_SYNC on ppc64? Hopefully > not many given the HW didn't ship yet as you wrote but I have no real clue. > Similarly there's a question: How many app writers will read manual for > older ppc64 architecture and write apps that won't work reliably on > POWER10? Again, I have no idea. > > So the prctl would be IMHO a nice safety belt but I'm not 100% certain it > will be needed... > > -aneesh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-01 12:01 ` Aneesh Kumar K.V @ 2020-06-01 12:07 ` Michal Suchánek -1 siblings, 0 replies; 40+ messages in thread From: Michal Suchánek @ 2020-06-01 12:07 UTC (permalink / raw) To: Aneesh Kumar K.V; +Cc: Jan Kara, jack, linuxppc-dev, mpe, linux-nvdimm On Mon, Jun 01, 2020 at 05:31:50PM +0530, Aneesh Kumar K.V wrote: > On 6/1/20 3:39 PM, Jan Kara wrote: > > On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: > > > On 5/29/20 3:22 PM, Jan Kara wrote: > > > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > > > > Thanks Michal. I also missed Jeff in this email thread. > > > > > > > > And I think you'll also need some of the sched maintainers for the prctl > > > > bits... > > > > > > > > > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > > > > > Adding Jan > > > > > > > > > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > > > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > > > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > > > > > the new instructions on newer hardware. > > > > > > > > > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > > > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > > > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > > > > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > > > ... > > > > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > > > > > index 8c700f881d92..d5a9a363e81e 100644 > > > > > > > --- a/kernel/fork.c > > > > > > > +++ b/kernel/fork.c > > > > > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > > > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > > > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > > > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > > > > > +#else > > > > > > > +unsigned long default_map_sync_mask = 0; > > > > > > > +#endif > > > > > > > + > > > > > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > > > explicitly uses the right prctl. Shouldn't we rather initialize > > > > default_map_sync_mask on boot based on whether the CPU we run on requires > > > > new flush instructions or not? Otherwise the patch looks sensible. > > > > > > > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. > > > But on a virtualized platform there is no easy way to detect that. We could > > > ideally hook this into the nvdimm driver where we look at the new compat > > > string ibm,persistent-memory-v2 and then disable MAP_SYNC > > > if we find a device with the specific value. > > > > Hum, couldn't we set some flag for nvdimm devices with > > "ibm,persistent-memory-v2" property and then check it during mmap(2) time > > and when the device has this propery and the mmap(2) caller doesn't have > > the prctl set, we'd disallow MAP_SYNC? That should make things mostly > > seamless, shouldn't it? Only apps that want to use MAP_SYNC on these > > devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these > > applications need to be aware of new instructions so this isn't that much > > additional burden... > > I am not sure application would want to add that much details/knowledge > about a platform in their code. I was expecting application to do > > #ifdef __ppc64__ > prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); > #endif > a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, > MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); > > > For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 > is not useful. Do you see a value in making all these device specific rather > than a conditional on __ppc64__? If the vpmem devices continue to work with the old instruction on POWER10 then it makes sense to make this per-device. Also adding a message to kernel log in case the application does not do the prctl would be helful for people migrating old code to POWER10. Thanks Michal _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-01 12:07 ` Michal Suchánek 0 siblings, 0 replies; 40+ messages in thread From: Michal Suchánek @ 2020-06-01 12:07 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, linuxppc-dev On Mon, Jun 01, 2020 at 05:31:50PM +0530, Aneesh Kumar K.V wrote: > On 6/1/20 3:39 PM, Jan Kara wrote: > > On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: > > > On 5/29/20 3:22 PM, Jan Kara wrote: > > > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > > > > Thanks Michal. I also missed Jeff in this email thread. > > > > > > > > And I think you'll also need some of the sched maintainers for the prctl > > > > bits... > > > > > > > > > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > > > > > Adding Jan > > > > > > > > > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > > > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > > > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > > > > > the new instructions on newer hardware. > > > > > > > > > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > > > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > > > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > > > > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > > > ... > > > > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > > > > > index 8c700f881d92..d5a9a363e81e 100644 > > > > > > > --- a/kernel/fork.c > > > > > > > +++ b/kernel/fork.c > > > > > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > > > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > > > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > > > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > > > > > +#else > > > > > > > +unsigned long default_map_sync_mask = 0; > > > > > > > +#endif > > > > > > > + > > > > > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > > > explicitly uses the right prctl. Shouldn't we rather initialize > > > > default_map_sync_mask on boot based on whether the CPU we run on requires > > > > new flush instructions or not? Otherwise the patch looks sensible. > > > > > > > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. > > > But on a virtualized platform there is no easy way to detect that. We could > > > ideally hook this into the nvdimm driver where we look at the new compat > > > string ibm,persistent-memory-v2 and then disable MAP_SYNC > > > if we find a device with the specific value. > > > > Hum, couldn't we set some flag for nvdimm devices with > > "ibm,persistent-memory-v2" property and then check it during mmap(2) time > > and when the device has this propery and the mmap(2) caller doesn't have > > the prctl set, we'd disallow MAP_SYNC? That should make things mostly > > seamless, shouldn't it? Only apps that want to use MAP_SYNC on these > > devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these > > applications need to be aware of new instructions so this isn't that much > > additional burden... > > I am not sure application would want to add that much details/knowledge > about a platform in their code. I was expecting application to do > > #ifdef __ppc64__ > prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); > #endif > a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, > MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); > > > For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 > is not useful. Do you see a value in making all these device specific rather > than a conditional on __ppc64__? If the vpmem devices continue to work with the old instruction on POWER10 then it makes sense to make this per-device. Also adding a message to kernel log in case the application does not do the prctl would be helful for people migrating old code to POWER10. Thanks Michal ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-01 12:07 ` Michal Suchánek @ 2020-06-01 12:20 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-01 12:20 UTC (permalink / raw) To: Michal Suchánek; +Cc: Jan Kara, jack, linuxppc-dev, mpe, linux-nvdimm On 6/1/20 5:37 PM, Michal Suchánek wrote: > On Mon, Jun 01, 2020 at 05:31:50PM +0530, Aneesh Kumar K.V wrote: >> On 6/1/20 3:39 PM, Jan Kara wrote: >>> On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: >>>> On 5/29/20 3:22 PM, Jan Kara wrote: >>>>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>>>> Thanks Michal. I also missed Jeff in this email thread. >>>>> >>>>> And I think you'll also need some of the sched maintainers for the prctl >>>>> bits... >>>>> >>>>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>>>> Adding Jan >>>>>>> >>>>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>>>> the new instructions on newer hardware. >>>>>>>> >>>>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>>>> >>>>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>>>> ... >>>>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>>>> --- a/kernel/fork.c >>>>>>>> +++ b/kernel/fork.c >>>>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>>>> +#else >>>>>>>> +unsigned long default_map_sync_mask = 0; >>>>>>>> +#endif >>>>>>>> + >>>>> >>>>> I'm not sure CONFIG is really the right approach here. For a distro that would >>>>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>>>> explicitly uses the right prctl. Shouldn't we rather initialize >>>>> default_map_sync_mask on boot based on whether the CPU we run on requires >>>>> new flush instructions or not? Otherwise the patch looks sensible. >>>>> >>>> >>>> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. >>>> But on a virtualized platform there is no easy way to detect that. We could >>>> ideally hook this into the nvdimm driver where we look at the new compat >>>> string ibm,persistent-memory-v2 and then disable MAP_SYNC >>>> if we find a device with the specific value. >>> >>> Hum, couldn't we set some flag for nvdimm devices with >>> "ibm,persistent-memory-v2" property and then check it during mmap(2) time >>> and when the device has this propery and the mmap(2) caller doesn't have >>> the prctl set, we'd disallow MAP_SYNC? That should make things mostly >>> seamless, shouldn't it? Only apps that want to use MAP_SYNC on these >>> devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these >>> applications need to be aware of new instructions so this isn't that much >>> additional burden... >> >> I am not sure application would want to add that much details/knowledge >> about a platform in their code. I was expecting application to do >> >> #ifdef __ppc64__ >> prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); >> #endif >> a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, >> MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); >> >> >> For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 >> is not useful. Do you see a value in making all these device specific rather >> than a conditional on __ppc64__? > If the vpmem devices continue to work with the old instruction on > POWER10 then it makes sense to make this per-device. vPMEM doesn't have write_cache and hence it is synchronous even without using any specific flush instruction. The question is do we want to have different programming steps when running on vPMEM vs a persistent PMEM device on ppc64. I will work on the device specific ENABLE flag and then we can compare the kernel complexity against the added benefit. -aneesh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-01 12:20 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-01 12:20 UTC (permalink / raw) To: Michal Suchánek Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, linuxppc-dev On 6/1/20 5:37 PM, Michal Suchánek wrote: > On Mon, Jun 01, 2020 at 05:31:50PM +0530, Aneesh Kumar K.V wrote: >> On 6/1/20 3:39 PM, Jan Kara wrote: >>> On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: >>>> On 5/29/20 3:22 PM, Jan Kara wrote: >>>>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>>>> Thanks Michal. I also missed Jeff in this email thread. >>>>> >>>>> And I think you'll also need some of the sched maintainers for the prctl >>>>> bits... >>>>> >>>>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>>>> Adding Jan >>>>>>> >>>>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>>>> the new instructions on newer hardware. >>>>>>>> >>>>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>>>> >>>>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>>>> ... >>>>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>>>> --- a/kernel/fork.c >>>>>>>> +++ b/kernel/fork.c >>>>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>>>> +#else >>>>>>>> +unsigned long default_map_sync_mask = 0; >>>>>>>> +#endif >>>>>>>> + >>>>> >>>>> I'm not sure CONFIG is really the right approach here. For a distro that would >>>>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>>>> explicitly uses the right prctl. Shouldn't we rather initialize >>>>> default_map_sync_mask on boot based on whether the CPU we run on requires >>>>> new flush instructions or not? Otherwise the patch looks sensible. >>>>> >>>> >>>> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. >>>> But on a virtualized platform there is no easy way to detect that. We could >>>> ideally hook this into the nvdimm driver where we look at the new compat >>>> string ibm,persistent-memory-v2 and then disable MAP_SYNC >>>> if we find a device with the specific value. >>> >>> Hum, couldn't we set some flag for nvdimm devices with >>> "ibm,persistent-memory-v2" property and then check it during mmap(2) time >>> and when the device has this propery and the mmap(2) caller doesn't have >>> the prctl set, we'd disallow MAP_SYNC? That should make things mostly >>> seamless, shouldn't it? Only apps that want to use MAP_SYNC on these >>> devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these >>> applications need to be aware of new instructions so this isn't that much >>> additional burden... >> >> I am not sure application would want to add that much details/knowledge >> about a platform in their code. I was expecting application to do >> >> #ifdef __ppc64__ >> prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); >> #endif >> a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, >> MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); >> >> >> For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 >> is not useful. Do you see a value in making all these device specific rather >> than a conditional on __ppc64__? > If the vpmem devices continue to work with the old instruction on > POWER10 then it makes sense to make this per-device. vPMEM doesn't have write_cache and hence it is synchronous even without using any specific flush instruction. The question is do we want to have different programming steps when running on vPMEM vs a persistent PMEM device on ppc64. I will work on the device specific ENABLE flag and then we can compare the kernel complexity against the added benefit. -aneesh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-01 12:20 ` Aneesh Kumar K.V @ 2020-06-02 7:57 ` Aneesh Kumar K.V -1 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-02 7:57 UTC (permalink / raw) To: Michal Suchánek; +Cc: Jan Kara, jack, linuxppc-dev, mpe, linux-nvdimm "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > On 6/1/20 5:37 PM, Michal Suchánek wrote: >> On Mon, Jun 01, 2020 at 05:31:50PM +0530, Aneesh Kumar K.V wrote: >>> On 6/1/20 3:39 PM, Jan Kara wrote: >>>> On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: >>>>> On 5/29/20 3:22 PM, Jan Kara wrote: >>>>>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>>>>> Thanks Michal. I also missed Jeff in this email thread. >>>>>> >>>>>> And I think you'll also need some of the sched maintainers for the prctl >>>>>> bits... >>>>>> >>>>>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>>>>> Adding Jan >>>>>>>> >>>>>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>>>>> the new instructions on newer hardware. >>>>>>>>> >>>>>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>>>>> >>>>>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>>>>> ... >>>>>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>>>>> --- a/kernel/fork.c >>>>>>>>> +++ b/kernel/fork.c >>>>>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>>>>> +#else >>>>>>>>> +unsigned long default_map_sync_mask = 0; >>>>>>>>> +#endif >>>>>>>>> + >>>>>> >>>>>> I'm not sure CONFIG is really the right approach here. For a distro that would >>>>>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>>>>> explicitly uses the right prctl. Shouldn't we rather initialize >>>>>> default_map_sync_mask on boot based on whether the CPU we run on requires >>>>>> new flush instructions or not? Otherwise the patch looks sensible. >>>>>> >>>>> >>>>> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. >>>>> But on a virtualized platform there is no easy way to detect that. We could >>>>> ideally hook this into the nvdimm driver where we look at the new compat >>>>> string ibm,persistent-memory-v2 and then disable MAP_SYNC >>>>> if we find a device with the specific value. >>>> >>>> Hum, couldn't we set some flag for nvdimm devices with >>>> "ibm,persistent-memory-v2" property and then check it during mmap(2) time >>>> and when the device has this propery and the mmap(2) caller doesn't have >>>> the prctl set, we'd disallow MAP_SYNC? That should make things mostly >>>> seamless, shouldn't it? Only apps that want to use MAP_SYNC on these >>>> devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these >>>> applications need to be aware of new instructions so this isn't that much >>>> additional burden... >>> >>> I am not sure application would want to add that much details/knowledge >>> about a platform in their code. I was expecting application to do >>> >>> #ifdef __ppc64__ >>> prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); >>> #endif >>> a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, >>> MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); >>> >>> >>> For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 >>> is not useful. Do you see a value in making all these device specific rather >>> than a conditional on __ppc64__? > >> If the vpmem devices continue to work with the old instruction on >> POWER10 then it makes sense to make this per-device. > > vPMEM doesn't have write_cache and hence it is synchronous even without > using any specific flush instruction. The question is do we want to have > different programming steps when running on vPMEM vs a persistent PMEM > device on ppc64. > > I will work on the device specific ENABLE flag and then we can compare > the kernel complexity against the added benefit. I have posted an RFC v2 [1] that implements a device-specific MAP_SYNC enable/disable feature. The Posted changes also add a dax flag suggested by Dan. With device-specific MAP_SYNC enable/disable, it was just a sysfs file export of the same flag. 1. https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/ -aneesh _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-02 7:57 ` Aneesh Kumar K.V 0 siblings, 0 replies; 40+ messages in thread From: Aneesh Kumar K.V @ 2020-06-02 7:57 UTC (permalink / raw) To: Michal Suchánek; +Cc: linuxppc-dev, jack, linux-nvdimm, Jan Kara "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > On 6/1/20 5:37 PM, Michal Suchánek wrote: >> On Mon, Jun 01, 2020 at 05:31:50PM +0530, Aneesh Kumar K.V wrote: >>> On 6/1/20 3:39 PM, Jan Kara wrote: >>>> On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: >>>>> On 5/29/20 3:22 PM, Jan Kara wrote: >>>>>> On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: >>>>>>> Thanks Michal. I also missed Jeff in this email thread. >>>>>> >>>>>> And I think you'll also need some of the sched maintainers for the prctl >>>>>> bits... >>>>>> >>>>>>> On 5/29/20 3:03 PM, Michal Suchánek wrote: >>>>>>>> Adding Jan >>>>>>>> >>>>>>>> On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: >>>>>>>>> With POWER10, architecture is adding new pmem flush and sync instructions. >>>>>>>>> The kernel should prevent the usage of MAP_SYNC if applications are not using >>>>>>>>> the new instructions on newer hardware. >>>>>>>>> >>>>>>>>> This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable >>>>>>>>> the usage of MAP_SYNC. The kernel config option is added to allow the user >>>>>>>>> to control whether MAP_SYNC should be enabled by default or not. >>>>>>>>> >>>>>>>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>>>>> ... >>>>>>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>>>>>> index 8c700f881d92..d5a9a363e81e 100644 >>>>>>>>> --- a/kernel/fork.c >>>>>>>>> +++ b/kernel/fork.c >>>>>>>>> @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); >>>>>>>>> static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; >>>>>>>>> +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE >>>>>>>>> +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; >>>>>>>>> +#else >>>>>>>>> +unsigned long default_map_sync_mask = 0; >>>>>>>>> +#endif >>>>>>>>> + >>>>>> >>>>>> I'm not sure CONFIG is really the right approach here. For a distro that would >>>>>> basically mean to disable MAP_SYNC for all PPC kernels unless application >>>>>> explicitly uses the right prctl. Shouldn't we rather initialize >>>>>> default_map_sync_mask on boot based on whether the CPU we run on requires >>>>>> new flush instructions or not? Otherwise the patch looks sensible. >>>>>> >>>>> >>>>> yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. >>>>> But on a virtualized platform there is no easy way to detect that. We could >>>>> ideally hook this into the nvdimm driver where we look at the new compat >>>>> string ibm,persistent-memory-v2 and then disable MAP_SYNC >>>>> if we find a device with the specific value. >>>> >>>> Hum, couldn't we set some flag for nvdimm devices with >>>> "ibm,persistent-memory-v2" property and then check it during mmap(2) time >>>> and when the device has this propery and the mmap(2) caller doesn't have >>>> the prctl set, we'd disallow MAP_SYNC? That should make things mostly >>>> seamless, shouldn't it? Only apps that want to use MAP_SYNC on these >>>> devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these >>>> applications need to be aware of new instructions so this isn't that much >>>> additional burden... >>> >>> I am not sure application would want to add that much details/knowledge >>> about a platform in their code. I was expecting application to do >>> >>> #ifdef __ppc64__ >>> prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); >>> #endif >>> a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, >>> MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); >>> >>> >>> For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 >>> is not useful. Do you see a value in making all these device specific rather >>> than a conditional on __ppc64__? > >> If the vpmem devices continue to work with the old instruction on >> POWER10 then it makes sense to make this per-device. > > vPMEM doesn't have write_cache and hence it is synchronous even without > using any specific flush instruction. The question is do we want to have > different programming steps when running on vPMEM vs a persistent PMEM > device on ppc64. > > I will work on the device specific ENABLE flag and then we can compare > the kernel complexity against the added benefit. I have posted an RFC v2 [1] that implements a device-specific MAP_SYNC enable/disable feature. The Posted changes also add a dax flag suggested by Dan. With device-specific MAP_SYNC enable/disable, it was just a sysfs file export of the same flag. 1. https://lore.kernel.org/linuxppc-dev/20200602074909.36738-1-aneesh.kumar@linux.ibm.com/ -aneesh ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. 2020-06-01 12:01 ` Aneesh Kumar K.V @ 2020-06-01 14:56 ` Jan Kara -1 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-01 14:56 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, Michal Suchánek, jack, linuxppc-dev, mpe, linux-nvdimm On Mon 01-06-20 17:31:50, Aneesh Kumar K.V wrote: > On 6/1/20 3:39 PM, Jan Kara wrote: > > On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: > > > On 5/29/20 3:22 PM, Jan Kara wrote: > > > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > > > > Thanks Michal. I also missed Jeff in this email thread. > > > > > > > > And I think you'll also need some of the sched maintainers for the prctl > > > > bits... > > > > > > > > > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > > > > > Adding Jan > > > > > > > > > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > > > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > > > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > > > > > the new instructions on newer hardware. > > > > > > > > > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > > > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > > > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > > > > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > > > ... > > > > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > > > > > index 8c700f881d92..d5a9a363e81e 100644 > > > > > > > --- a/kernel/fork.c > > > > > > > +++ b/kernel/fork.c > > > > > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > > > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > > > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > > > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > > > > > +#else > > > > > > > +unsigned long default_map_sync_mask = 0; > > > > > > > +#endif > > > > > > > + > > > > > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > > > explicitly uses the right prctl. Shouldn't we rather initialize > > > > default_map_sync_mask on boot based on whether the CPU we run on requires > > > > new flush instructions or not? Otherwise the patch looks sensible. > > > > > > > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. > > > But on a virtualized platform there is no easy way to detect that. We could > > > ideally hook this into the nvdimm driver where we look at the new compat > > > string ibm,persistent-memory-v2 and then disable MAP_SYNC > > > if we find a device with the specific value. > > > > Hum, couldn't we set some flag for nvdimm devices with > > "ibm,persistent-memory-v2" property and then check it during mmap(2) time > > and when the device has this propery and the mmap(2) caller doesn't have > > the prctl set, we'd disallow MAP_SYNC? That should make things mostly > > seamless, shouldn't it? Only apps that want to use MAP_SYNC on these > > devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these > > applications need to be aware of new instructions so this isn't that much > > additional burden... > > I am not sure application would want to add that much details/knowledge > about a platform in their code. I was expecting application to do > > #ifdef __ppc64__ > prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); > #endif > a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, > MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); > > > For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 > is not useful. Do you see a value in making all these device specific rather > than a conditional on __ppc64__? Yes, from the application POV the code would look like this plus the application would use instructions appropriate for POWER10 for flushing caches... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org ^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support. @ 2020-06-01 14:56 ` Jan Kara 0 siblings, 0 replies; 40+ messages in thread From: Jan Kara @ 2020-06-01 14:56 UTC (permalink / raw) To: Aneesh Kumar K.V Cc: Jan Kara, linux-nvdimm, jack, Jeff Moyer, oohall, dan.j.williams, Michal Suchánek, linuxppc-dev On Mon 01-06-20 17:31:50, Aneesh Kumar K.V wrote: > On 6/1/20 3:39 PM, Jan Kara wrote: > > On Fri 29-05-20 16:25:35, Aneesh Kumar K.V wrote: > > > On 5/29/20 3:22 PM, Jan Kara wrote: > > > > On Fri 29-05-20 15:07:31, Aneesh Kumar K.V wrote: > > > > > Thanks Michal. I also missed Jeff in this email thread. > > > > > > > > And I think you'll also need some of the sched maintainers for the prctl > > > > bits... > > > > > > > > > On 5/29/20 3:03 PM, Michal Suchánek wrote: > > > > > > Adding Jan > > > > > > > > > > > > On Fri, May 29, 2020 at 11:11:39AM +0530, Aneesh Kumar K.V wrote: > > > > > > > With POWER10, architecture is adding new pmem flush and sync instructions. > > > > > > > The kernel should prevent the usage of MAP_SYNC if applications are not using > > > > > > > the new instructions on newer hardware. > > > > > > > > > > > > > > This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable > > > > > > > the usage of MAP_SYNC. The kernel config option is added to allow the user > > > > > > > to control whether MAP_SYNC should be enabled by default or not. > > > > > > > > > > > > > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > > > ... > > > > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > > > > > index 8c700f881d92..d5a9a363e81e 100644 > > > > > > > --- a/kernel/fork.c > > > > > > > +++ b/kernel/fork.c > > > > > > > @@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > > > > > > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > > > > > > +#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE > > > > > > > +unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK; > > > > > > > +#else > > > > > > > +unsigned long default_map_sync_mask = 0; > > > > > > > +#endif > > > > > > > + > > > > > > > > I'm not sure CONFIG is really the right approach here. For a distro that would > > > > basically mean to disable MAP_SYNC for all PPC kernels unless application > > > > explicitly uses the right prctl. Shouldn't we rather initialize > > > > default_map_sync_mask on boot based on whether the CPU we run on requires > > > > new flush instructions or not? Otherwise the patch looks sensible. > > > > > > > > > > yes that is correct. We ideally want to deny MAP_SYNC only w.r.t POWER10. > > > But on a virtualized platform there is no easy way to detect that. We could > > > ideally hook this into the nvdimm driver where we look at the new compat > > > string ibm,persistent-memory-v2 and then disable MAP_SYNC > > > if we find a device with the specific value. > > > > Hum, couldn't we set some flag for nvdimm devices with > > "ibm,persistent-memory-v2" property and then check it during mmap(2) time > > and when the device has this propery and the mmap(2) caller doesn't have > > the prctl set, we'd disallow MAP_SYNC? That should make things mostly > > seamless, shouldn't it? Only apps that want to use MAP_SYNC on these > > devices would need to use prctl(MMF_DISABLE_MAP_SYNC, 0) but then these > > applications need to be aware of new instructions so this isn't that much > > additional burden... > > I am not sure application would want to add that much details/knowledge > about a platform in their code. I was expecting application to do > > #ifdef __ppc64__ > prctl(MAP_SYNC_ENABLE, 1, 0, 0, 0)); > #endif > a = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, > MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); > > > For that code all the complexity that we add w.r.t ibm,persistent-memory-v2 > is not useful. Do you see a value in making all these device specific rather > than a conditional on __ppc64__? Yes, from the application POV the code would look like this plus the application would use instructions appropriate for POWER10 for flushing caches... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR ^ permalink raw reply [flat|nested] 40+ messages in thread
end of thread, other threads:[~2020-06-08 7:44 UTC | newest] Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-05-29 5:41 [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support Aneesh Kumar K.V 2020-05-29 5:41 ` Aneesh Kumar K.V 2020-05-29 5:41 ` [RFC PATCH 2/2] powerpc/pmem: Disable synchronous fault by default Aneesh Kumar K.V 2020-05-29 5:41 ` Aneesh Kumar K.V 2020-05-29 9:33 ` [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support Michal Suchánek 2020-05-29 9:33 ` Michal Suchánek 2020-05-29 9:37 ` Aneesh Kumar K.V 2020-05-29 9:37 ` Aneesh Kumar K.V 2020-05-29 9:52 ` Jan Kara 2020-05-29 9:52 ` Jan Kara 2020-05-29 10:55 ` Aneesh Kumar K.V 2020-05-29 10:55 ` Aneesh Kumar K.V 2020-05-29 19:22 ` Dan Williams 2020-05-29 19:22 ` Dan Williams 2020-05-30 7:18 ` Aneesh Kumar K.V 2020-05-30 7:18 ` Aneesh Kumar K.V 2020-05-30 16:35 ` Dan Williams 2020-05-30 16:35 ` Dan Williams 2020-06-01 9:50 ` Jan Kara 2020-06-01 9:50 ` Jan Kara 2020-06-02 17:59 ` Williams, Dan J 2020-06-02 17:59 ` Williams, Dan J 2020-06-03 8:26 ` Jan Kara 2020-06-03 8:26 ` Jan Kara 2020-06-03 9:09 ` Aneesh Kumar K.V 2020-06-03 9:09 ` Aneesh Kumar K.V 2020-06-08 7:42 ` Aneesh Kumar K.V 2020-06-08 7:42 ` Aneesh Kumar K.V 2020-06-01 10:09 ` Jan Kara 2020-06-01 10:09 ` Jan Kara 2020-06-01 12:01 ` Aneesh Kumar K.V 2020-06-01 12:01 ` Aneesh Kumar K.V 2020-06-01 12:07 ` Michal Suchánek 2020-06-01 12:07 ` Michal Suchánek 2020-06-01 12:20 ` Aneesh Kumar K.V 2020-06-01 12:20 ` Aneesh Kumar K.V 2020-06-02 7:57 ` Aneesh Kumar K.V 2020-06-02 7:57 ` Aneesh Kumar K.V 2020-06-01 14:56 ` Jan Kara 2020-06-01 14:56 ` Jan Kara
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.