All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-20 18:30 ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-20 18:30 UTC (permalink / raw)
  To: linux-kernel; +Cc: Linus Torvalds, linux-mm, Ingo Molnar

This patch adds support for the SENSITIVE flag to the low level page
allocator. An additional GFP flag is added for use with higher level
allocators (GFP_SENSITIVE, which implies GFP_ZERO).

The code is largely based off the memory sanitization feature in the
PaX project (licensed under the GPL v2 terms), and allows fine grained
marking of pages for sanitization on allocation and release time, as an
opt-in feature (instead of its opt-all counterpart in PaX).

This avoids leaking sensitive information when memory is released to
the system after use, for example in cryptographic subsystems.

The next patches in this set deploy this flag for different
subsystems that could potentially leak cryptographic secrets or other
confidential information by means of an information leak or other kinds
of security bugs (ex. use of uninitialized variables or use-after-free),
besides extending the remanence of this data on memory (allowing
Iceman/coldboot attacks possible).

The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
Deallocation" paper by Jim Chow et. al from the Stanford University
Department of Computer Science, explains the security implications of
insecure deallocation, and provides extensive information with figures
and applications thoroughly analyzed for this behavior [1]. More recently
this issue came to widespread attention when the "Lest We Remember:
Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
published [2].

This patch has been tested on x86 and amd64, with and without HIGHMEM.

	[1] http://www.stanford.edu/~blp/papers/shredding.html
	[2] http://citp.princeton.edu/memory/

Signed-off-by: Larry H. <research@subreption.com>

---
 arch/alpha/include/asm/kmap_types.h    |    3 ++-
 arch/arm/include/asm/kmap_types.h      |    1 +
 arch/avr32/include/asm/kmap_types.h    |    3 ++-
 arch/blackfin/include/asm/kmap_types.h |    1 +
 arch/cris/include/asm/kmap_types.h     |    1 +
 arch/h8300/include/asm/kmap_types.h    |    1 +
 arch/ia64/include/asm/kmap_types.h     |    3 ++-
 arch/m68k/include/asm/kmap_types_mm.h  |    1 +
 arch/m68k/include/asm/kmap_types_no.h  |    1 +
 arch/mips/include/asm/kmap_types.h     |    3 ++-
 arch/parisc/include/asm/kmap_types.h   |    3 ++-
 arch/powerpc/include/asm/kmap_types.h  |    1 +
 arch/s390/include/asm/kmap_types.h     |    1 +
 arch/sh/include/asm/kmap_types.h       |    3 ++-
 arch/sparc/include/asm/kmap_types.h    |    1 +
 arch/um/include/asm/kmap_types.h       |    1 +
 arch/x86/include/asm/kmap_types.h      |    3 ++-
 arch/xtensa/include/asm/kmap_types.h   |    1 +
 include/asm-frv/kmap_types.h           |    1 +
 include/asm-m32r/kmap_types.h          |    3 ++-
 include/asm-mn10300/kmap_types.h       |    1 +
 include/linux/gfp.h                    |   17 +++++++++++++++++
 include/linux/highmem.h                |   12 ++++++++++++
 include/linux/page-flags.h             |    2 ++
 include/linux/slab.h                   |    1 +
 mm/Kconfig                             |   20 ++++++++++++++++++++
 mm/page_alloc.c                        |   22 ++++++++++++++++++++++
 mm/slab.c                              |    8 +++++++-
 mm/slub.c                              |    3 +++
 29 files changed, 113 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/alpha/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/kmap_types.h
+++ linux-2.6/arch/alpha/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)  KM_CLEARPAGE,
+D(14)  KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/arm/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/kmap_types.h
+++ linux-2.6/arch/arm/include/asm/kmap_types.h
@@ -18,6 +18,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/avr32/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/avr32/include/asm/kmap_types.h
+++ linux-2.6/arch/avr32/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(10)	KM_IRQ0,
 D(11)	KM_IRQ1,
 D(12)	KM_SOFTIRQ0,
 D(13)	KM_SOFTIRQ1,
-D(14)	KM_TYPE_NR
+D(14)	KM_CLEARPAGE,
+D(15)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/blackfin/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/kmap_types.h
+++ linux-2.6/arch/blackfin/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/cris/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/kmap_types.h
+++ linux-2.6/arch/cris/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/h8300/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/kmap_types.h
+++ linux-2.6/arch/h8300/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/ia64/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/kmap_types.h
+++ linux-2.6/arch/ia64/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_mm.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/m68k/include/asm/kmap_types_no.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_no.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_no.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/mips/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/mips/include/asm/kmap_types.h
+++ linux-2.6/arch/mips/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/parisc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/parisc/include/asm/kmap_types.h
+++ linux-2.6/arch/parisc/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/powerpc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/kmap_types.h
+++ linux-2.6/arch/powerpc/include/asm/kmap_types.h
@@ -26,6 +26,7 @@ enum km_type {
 	KM_SOFTIRQ1,
 	KM_PPC_SYNC_PAGE,
 	KM_PPC_SYNC_ICACHE,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/s390/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/kmap_types.h
+++ linux-2.6/arch/s390/include/asm/kmap_types.h
@@ -16,6 +16,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,	
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/sh/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/kmap_types.h
+++ linux-2.6/arch/sh/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/sparc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/kmap_types.h
+++ linux-2.6/arch/sparc/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/um/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/kmap_types.h
+++ linux-2.6/arch/um/include/asm/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/x86/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/kmap_types.h
+++ linux-2.6/arch/x86/include/asm/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/xtensa/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/xtensa/include/asm/kmap_types.h
+++ linux-2.6/arch/xtensa/include/asm/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
   KM_IRQ1,
   KM_SOFTIRQ0,
   KM_SOFTIRQ1,
+  KM_CLEARPAGE,
   KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-frv/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-frv/kmap_types.h
+++ linux-2.6/include/asm-frv/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-m32r/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/kmap_types.h
+++ linux-2.6/include/asm-m32r/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/include/asm-mn10300/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-mn10300/kmap_types.h
+++ linux-2.6/include/asm-mn10300/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -50,6 +50,7 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
 #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+#define __GFP_SENSITIVE	((__force gfp_t)0x200000u)  /* Page contains sensitive information */
 
 #define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -69,6 +70,7 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
+#define GFP_SENSITIVE	(__GFP_SENSITIVE | __GFP_ZERO)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
@@ -131,6 +133,21 @@ static inline enum zone_type gfp_zone(gf
 	return ZONE_NORMAL;
 }
 
+#ifdef CONFIG_PAGE_SENSITIVE
+static inline int gfp_sensitive(gfp_t flags)
+{
+	if (flags & __GFP_SENSITIVE)
+		return 1;
+
+	return 0;
+}
+#else
+static inline int gfp_sensitive(gfp_t flags)
+{
+	return 0;
+}
+#endif
+
 /*
  * There is only one page-allocator function, and two main namespaces to
  * it. The alloc_page*() variants return 'struct page *' and as such
Index: linux-2.6/include/linux/highmem.h
===================================================================
--- linux-2.6.orig/include/linux/highmem.h
+++ linux-2.6/include/linux/highmem.h
@@ -124,6 +124,18 @@ static inline void clear_highpage(struct
 	kunmap_atomic(kaddr, KM_USER0);
 }
 
+static inline void sanitize_highpage(struct page *page)
+{
+	void *kaddr;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kaddr = kmap_atomic(page, KM_CLEARPAGE);
+	clear_page(kaddr);
+	kunmap_atomic(kaddr, KM_CLEARPAGE);
+	local_irq_restore(flags);
+}
+
 static inline void zero_user_segments(struct page *page,
 	unsigned start1, unsigned end1,
 	unsigned start2, unsigned end2)
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -101,6 +101,7 @@ enum pageflags {
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
+	PG_sensitive,		/* Page holds sensitive data */
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -195,6 +196,7 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGE
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
+PAGEFLAG(Sensitive, sensitive)
 
 __PAGEFLAG(SlobPage, slob_page)
 __PAGEFLAG(SlobFree, slob_free)
Index: linux-2.6/include/linux/slab.h
===================================================================
--- linux-2.6.orig/include/linux/slab.h
+++ linux-2.6/include/linux/slab.h
@@ -23,6 +23,7 @@
 #define SLAB_CACHE_DMA		0x00004000UL	/* Use GFP_DMA memory */
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
+#define SLAB_SENSITIVE		0x00080000UL	/* Memory will hold sensitive information */
 /*
  * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
  *
Index: linux-2.6/mm/Kconfig
===================================================================
--- linux-2.6.orig/mm/Kconfig
+++ linux-2.6/mm/Kconfig
@@ -155,6 +155,26 @@ config PAGEFLAGS_EXTENDED
 	def_bool y
 	depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM
 
+config PAGE_SENSITIVE
+	bool "Support for selective page sanitization"
+	help
+	 This option provides support for honoring the sensitive bit
+	 in the low level page allocator. This bit is used to mark
+	 pages that will contain sensitive information (such as
+	 cryptographic secrets and credentials).
+
+	 Pages marked with the sensitive bit will be sanitized upon
+	 release, to prevent information leaks and data remanence that
+	 could allow Iceman/coldboot attacks to reveal such data.
+
+	 If you are unsure, select N. This option might introduce a
+	 minimal performance impact on those subsystems that make
+	 use of the flag associated with the sensitive bit.
+
+	 If you use the cryptographic API or want to prevent tty
+	 information leaks locally, you most likely want to enable
+	 this.
+
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -545,6 +545,7 @@ static void free_one_page(struct zone *z
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long index = 1UL << order;
 	unsigned long flags;
 	int i;
 	int bad = 0;
@@ -559,6 +560,18 @@ static void __free_pages_ok(struct page 
 		debug_check_no_obj_freed(page_address(page),
 					   PAGE_SIZE << order);
 	}
+
+	/*
+	 * Page has the SENSITIVE flag set. We zero the memory
+	 * and clear the flag bit.
+	 */
+	if (PageSensitive(page)) {
+		for (; index; --index)
+			sanitize_highpage(page + index - 1);
+
+		ClearPageSensitive(page);
+	}
+
 	arch_free_page(page, order);
 	kernel_map_pages(page, 1 << order, 0);
 
@@ -650,6 +663,9 @@ static int prep_new_page(struct page *pa
 	if (gfp_flags & __GFP_ZERO)
 		prep_zero_page(page, order, gfp_flags);
 
+	if (gfp_sensitive(gfp_flags))
+		SetPageSensitive(page);
+
 	if (order && (gfp_flags & __GFP_COMP))
 		prep_compound_page(page, order);
 
@@ -1009,6 +1025,12 @@ static void free_hot_cold_page(struct pa
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
 		debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
 	}
+
+	if (PageSensitive(page)) {
+		sanitize_highpage(page);
+		ClearPageSensitive(page);
+	}
+
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -2270,7 +2270,11 @@ kmem_cache_create (const char *name, siz
 	align = ralign;
 
 	/* Get cache's description obj. */
-	cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+	if (flags & SLAB_SENSITIVE)
+		cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL | GFP_SENSITIVE);
+	else
+		cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+
 	if (!cachep)
 		goto oops;
 
@@ -2356,6 +2360,8 @@ kmem_cache_create (const char *name, siz
 	cachep->gfpflags = 0;
 	if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
 		cachep->gfpflags |= GFP_DMA;
+	if (flags & SLAB_SENSITIVE)
+		cachep->gfpflags |= GFP_SENSITIVE;
 	cachep->buffer_size = size;
 	cachep->reciprocal_buffer_size = reciprocal_value(size);
 
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -2292,6 +2292,9 @@ static int calculate_sizes(struct kmem_c
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		s->allocflags |= __GFP_RECLAIMABLE;
 
+	if (s->flags & SLAB_SENSITIVE)
+		s->allocflags |= GFP_SENSITIVE;
+
 	/*
 	 * Determine the number of objects per slab
 	 */

^ permalink raw reply	[flat|nested] 220+ messages in thread

* [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-20 18:30 ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-20 18:30 UTC (permalink / raw)
  To: linux-kernel; +Cc: Linus Torvalds, linux-mm, Ingo Molnar

This patch adds support for the SENSITIVE flag to the low level page
allocator. An additional GFP flag is added for use with higher level
allocators (GFP_SENSITIVE, which implies GFP_ZERO).

The code is largely based off the memory sanitization feature in the
PaX project (licensed under the GPL v2 terms), and allows fine grained
marking of pages for sanitization on allocation and release time, as an
opt-in feature (instead of its opt-all counterpart in PaX).

This avoids leaking sensitive information when memory is released to
the system after use, for example in cryptographic subsystems.

The next patches in this set deploy this flag for different
subsystems that could potentially leak cryptographic secrets or other
confidential information by means of an information leak or other kinds
of security bugs (ex. use of uninitialized variables or use-after-free),
besides extending the remanence of this data on memory (allowing
Iceman/coldboot attacks possible).

The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
Deallocation" paper by Jim Chow et. al from the Stanford University
Department of Computer Science, explains the security implications of
insecure deallocation, and provides extensive information with figures
and applications thoroughly analyzed for this behavior [1]. More recently
this issue came to widespread attention when the "Lest We Remember:
Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
published [2].

This patch has been tested on x86 and amd64, with and without HIGHMEM.

	[1] http://www.stanford.edu/~blp/papers/shredding.html
	[2] http://citp.princeton.edu/memory/

Signed-off-by: Larry H. <research@subreption.com>

---
 arch/alpha/include/asm/kmap_types.h    |    3 ++-
 arch/arm/include/asm/kmap_types.h      |    1 +
 arch/avr32/include/asm/kmap_types.h    |    3 ++-
 arch/blackfin/include/asm/kmap_types.h |    1 +
 arch/cris/include/asm/kmap_types.h     |    1 +
 arch/h8300/include/asm/kmap_types.h    |    1 +
 arch/ia64/include/asm/kmap_types.h     |    3 ++-
 arch/m68k/include/asm/kmap_types_mm.h  |    1 +
 arch/m68k/include/asm/kmap_types_no.h  |    1 +
 arch/mips/include/asm/kmap_types.h     |    3 ++-
 arch/parisc/include/asm/kmap_types.h   |    3 ++-
 arch/powerpc/include/asm/kmap_types.h  |    1 +
 arch/s390/include/asm/kmap_types.h     |    1 +
 arch/sh/include/asm/kmap_types.h       |    3 ++-
 arch/sparc/include/asm/kmap_types.h    |    1 +
 arch/um/include/asm/kmap_types.h       |    1 +
 arch/x86/include/asm/kmap_types.h      |    3 ++-
 arch/xtensa/include/asm/kmap_types.h   |    1 +
 include/asm-frv/kmap_types.h           |    1 +
 include/asm-m32r/kmap_types.h          |    3 ++-
 include/asm-mn10300/kmap_types.h       |    1 +
 include/linux/gfp.h                    |   17 +++++++++++++++++
 include/linux/highmem.h                |   12 ++++++++++++
 include/linux/page-flags.h             |    2 ++
 include/linux/slab.h                   |    1 +
 mm/Kconfig                             |   20 ++++++++++++++++++++
 mm/page_alloc.c                        |   22 ++++++++++++++++++++++
 mm/slab.c                              |    8 +++++++-
 mm/slub.c                              |    3 +++
 29 files changed, 113 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/alpha/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/kmap_types.h
+++ linux-2.6/arch/alpha/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)  KM_CLEARPAGE,
+D(14)  KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/arm/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/kmap_types.h
+++ linux-2.6/arch/arm/include/asm/kmap_types.h
@@ -18,6 +18,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/avr32/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/avr32/include/asm/kmap_types.h
+++ linux-2.6/arch/avr32/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(10)	KM_IRQ0,
 D(11)	KM_IRQ1,
 D(12)	KM_SOFTIRQ0,
 D(13)	KM_SOFTIRQ1,
-D(14)	KM_TYPE_NR
+D(14)	KM_CLEARPAGE,
+D(15)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/blackfin/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/kmap_types.h
+++ linux-2.6/arch/blackfin/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/cris/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/kmap_types.h
+++ linux-2.6/arch/cris/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/h8300/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/kmap_types.h
+++ linux-2.6/arch/h8300/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/ia64/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/kmap_types.h
+++ linux-2.6/arch/ia64/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_mm.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/m68k/include/asm/kmap_types_no.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_no.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_no.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/mips/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/mips/include/asm/kmap_types.h
+++ linux-2.6/arch/mips/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/parisc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/parisc/include/asm/kmap_types.h
+++ linux-2.6/arch/parisc/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/powerpc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/kmap_types.h
+++ linux-2.6/arch/powerpc/include/asm/kmap_types.h
@@ -26,6 +26,7 @@ enum km_type {
 	KM_SOFTIRQ1,
 	KM_PPC_SYNC_PAGE,
 	KM_PPC_SYNC_ICACHE,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/s390/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/kmap_types.h
+++ linux-2.6/arch/s390/include/asm/kmap_types.h
@@ -16,6 +16,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,	
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/sh/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/kmap_types.h
+++ linux-2.6/arch/sh/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/sparc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/kmap_types.h
+++ linux-2.6/arch/sparc/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/um/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/kmap_types.h
+++ linux-2.6/arch/um/include/asm/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/x86/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/kmap_types.h
+++ linux-2.6/arch/x86/include/asm/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/xtensa/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/xtensa/include/asm/kmap_types.h
+++ linux-2.6/arch/xtensa/include/asm/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
   KM_IRQ1,
   KM_SOFTIRQ0,
   KM_SOFTIRQ1,
+  KM_CLEARPAGE,
   KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-frv/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-frv/kmap_types.h
+++ linux-2.6/include/asm-frv/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-m32r/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/kmap_types.h
+++ linux-2.6/include/asm-m32r/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/include/asm-mn10300/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-mn10300/kmap_types.h
+++ linux-2.6/include/asm-mn10300/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -50,6 +50,7 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
 #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+#define __GFP_SENSITIVE	((__force gfp_t)0x200000u)  /* Page contains sensitive information */
 
 #define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -69,6 +70,7 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
+#define GFP_SENSITIVE	(__GFP_SENSITIVE | __GFP_ZERO)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
@@ -131,6 +133,21 @@ static inline enum zone_type gfp_zone(gf
 	return ZONE_NORMAL;
 }
 
+#ifdef CONFIG_PAGE_SENSITIVE
+static inline int gfp_sensitive(gfp_t flags)
+{
+	if (flags & __GFP_SENSITIVE)
+		return 1;
+
+	return 0;
+}
+#else
+static inline int gfp_sensitive(gfp_t flags)
+{
+	return 0;
+}
+#endif
+
 /*
  * There is only one page-allocator function, and two main namespaces to
  * it. The alloc_page*() variants return 'struct page *' and as such
Index: linux-2.6/include/linux/highmem.h
===================================================================
--- linux-2.6.orig/include/linux/highmem.h
+++ linux-2.6/include/linux/highmem.h
@@ -124,6 +124,18 @@ static inline void clear_highpage(struct
 	kunmap_atomic(kaddr, KM_USER0);
 }
 
+static inline void sanitize_highpage(struct page *page)
+{
+	void *kaddr;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kaddr = kmap_atomic(page, KM_CLEARPAGE);
+	clear_page(kaddr);
+	kunmap_atomic(kaddr, KM_CLEARPAGE);
+	local_irq_restore(flags);
+}
+
 static inline void zero_user_segments(struct page *page,
 	unsigned start1, unsigned end1,
 	unsigned start2, unsigned end2)
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -101,6 +101,7 @@ enum pageflags {
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
+	PG_sensitive,		/* Page holds sensitive data */
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -195,6 +196,7 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGE
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
+PAGEFLAG(Sensitive, sensitive)
 
 __PAGEFLAG(SlobPage, slob_page)
 __PAGEFLAG(SlobFree, slob_free)
Index: linux-2.6/include/linux/slab.h
===================================================================
--- linux-2.6.orig/include/linux/slab.h
+++ linux-2.6/include/linux/slab.h
@@ -23,6 +23,7 @@
 #define SLAB_CACHE_DMA		0x00004000UL	/* Use GFP_DMA memory */
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
+#define SLAB_SENSITIVE		0x00080000UL	/* Memory will hold sensitive information */
 /*
  * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
  *
Index: linux-2.6/mm/Kconfig
===================================================================
--- linux-2.6.orig/mm/Kconfig
+++ linux-2.6/mm/Kconfig
@@ -155,6 +155,26 @@ config PAGEFLAGS_EXTENDED
 	def_bool y
 	depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM
 
+config PAGE_SENSITIVE
+	bool "Support for selective page sanitization"
+	help
+	 This option provides support for honoring the sensitive bit
+	 in the low level page allocator. This bit is used to mark
+	 pages that will contain sensitive information (such as
+	 cryptographic secrets and credentials).
+
+	 Pages marked with the sensitive bit will be sanitized upon
+	 release, to prevent information leaks and data remanence that
+	 could allow Iceman/coldboot attacks to reveal such data.
+
+	 If you are unsure, select N. This option might introduce a
+	 minimal performance impact on those subsystems that make
+	 use of the flag associated with the sensitive bit.
+
+	 If you use the cryptographic API or want to prevent tty
+	 information leaks locally, you most likely want to enable
+	 this.
+
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -545,6 +545,7 @@ static void free_one_page(struct zone *z
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long index = 1UL << order;
 	unsigned long flags;
 	int i;
 	int bad = 0;
@@ -559,6 +560,18 @@ static void __free_pages_ok(struct page 
 		debug_check_no_obj_freed(page_address(page),
 					   PAGE_SIZE << order);
 	}
+
+	/*
+	 * Page has the SENSITIVE flag set. We zero the memory
+	 * and clear the flag bit.
+	 */
+	if (PageSensitive(page)) {
+		for (; index; --index)
+			sanitize_highpage(page + index - 1);
+
+		ClearPageSensitive(page);
+	}
+
 	arch_free_page(page, order);
 	kernel_map_pages(page, 1 << order, 0);
 
@@ -650,6 +663,9 @@ static int prep_new_page(struct page *pa
 	if (gfp_flags & __GFP_ZERO)
 		prep_zero_page(page, order, gfp_flags);
 
+	if (gfp_sensitive(gfp_flags))
+		SetPageSensitive(page);
+
 	if (order && (gfp_flags & __GFP_COMP))
 		prep_compound_page(page, order);
 
@@ -1009,6 +1025,12 @@ static void free_hot_cold_page(struct pa
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
 		debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
 	}
+
+	if (PageSensitive(page)) {
+		sanitize_highpage(page);
+		ClearPageSensitive(page);
+	}
+
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -2270,7 +2270,11 @@ kmem_cache_create (const char *name, siz
 	align = ralign;
 
 	/* Get cache's description obj. */
-	cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+	if (flags & SLAB_SENSITIVE)
+		cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL | GFP_SENSITIVE);
+	else
+		cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+
 	if (!cachep)
 		goto oops;
 
@@ -2356,6 +2360,8 @@ kmem_cache_create (const char *name, siz
 	cachep->gfpflags = 0;
 	if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
 		cachep->gfpflags |= GFP_DMA;
+	if (flags & SLAB_SENSITIVE)
+		cachep->gfpflags |= GFP_SENSITIVE;
 	cachep->buffer_size = size;
 	cachep->reciprocal_buffer_size = reciprocal_value(size);
 
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -2292,6 +2292,9 @@ static int calculate_sizes(struct kmem_c
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		s->allocflags |= __GFP_RECLAIMABLE;
 
+	if (s->flags & SLAB_SENSITIVE)
+		s->allocflags |= GFP_SENSITIVE;
+
 	/*
 	 * Determine the number of objects per slab
 	 */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 18:30 ` Larry H.
@ 2009-05-20 20:42   ` Peter Zijlstra
  -1 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-20 20:42 UTC (permalink / raw)
  To: Larry H.; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On Wed, 2009-05-20 at 11:30 -0700, Larry H. wrote:
> This patch adds support for the SENSITIVE flag to the low level page
> allocator. An additional GFP flag is added for use with higher level
> allocators (GFP_SENSITIVE, which implies GFP_ZERO).
> 
> The code is largely based off the memory sanitization feature in the
> PaX project (licensed under the GPL v2 terms), and allows fine grained
> marking of pages for sanitization on allocation and release time, as an
> opt-in feature (instead of its opt-all counterpart in PaX).
> 
> This avoids leaking sensitive information when memory is released to
> the system after use, for example in cryptographic subsystems.
> 
> The next patches in this set deploy this flag for different
> subsystems that could potentially leak cryptographic secrets or other
> confidential information by means of an information leak or other kinds
> of security bugs (ex. use of uninitialized variables or use-after-free),
> besides extending the remanence of this data on memory (allowing
> Iceman/coldboot attacks possible).
> 
> The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
> Deallocation" paper by Jim Chow et. al from the Stanford University
> Department of Computer Science, explains the security implications of
> insecure deallocation, and provides extensive information with figures
> and applications thoroughly analyzed for this behavior [1]. More recently
> this issue came to widespread attention when the "Lest We Remember:
> Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
> published [2].

Seems like a particularly wasteful use of a pageflag. Why not simply
erase the buffer before freeing in those few places where we know its
important (ie. exactly those places you now put the pageflag in)?


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-20 20:42   ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-20 20:42 UTC (permalink / raw)
  To: Larry H.; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On Wed, 2009-05-20 at 11:30 -0700, Larry H. wrote:
> This patch adds support for the SENSITIVE flag to the low level page
> allocator. An additional GFP flag is added for use with higher level
> allocators (GFP_SENSITIVE, which implies GFP_ZERO).
> 
> The code is largely based off the memory sanitization feature in the
> PaX project (licensed under the GPL v2 terms), and allows fine grained
> marking of pages for sanitization on allocation and release time, as an
> opt-in feature (instead of its opt-all counterpart in PaX).
> 
> This avoids leaking sensitive information when memory is released to
> the system after use, for example in cryptographic subsystems.
> 
> The next patches in this set deploy this flag for different
> subsystems that could potentially leak cryptographic secrets or other
> confidential information by means of an information leak or other kinds
> of security bugs (ex. use of uninitialized variables or use-after-free),
> besides extending the remanence of this data on memory (allowing
> Iceman/coldboot attacks possible).
> 
> The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
> Deallocation" paper by Jim Chow et. al from the Stanford University
> Department of Computer Science, explains the security implications of
> insecure deallocation, and provides extensive information with figures
> and applications thoroughly analyzed for this behavior [1]. More recently
> this issue came to widespread attention when the "Lest We Remember:
> Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
> published [2].

Seems like a particularly wasteful use of a pageflag. Why not simply
erase the buffer before freeing in those few places where we know its
important (ie. exactly those places you now put the pageflag in)?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 20:42   ` Peter Zijlstra
@ 2009-05-20 21:24     ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-20 21:24 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On 22:42 Wed 20 May     , Peter Zijlstra wrote:
> On Wed, 2009-05-20 at 11:30 -0700, Larry H. wrote:
> > This patch adds support for the SENSITIVE flag to the low level page
> > allocator. An additional GFP flag is added for use with higher level
> > allocators (GFP_SENSITIVE, which implies GFP_ZERO).
> > 
> > The code is largely based off the memory sanitization feature in the
> > PaX project (licensed under the GPL v2 terms), and allows fine grained
> > marking of pages for sanitization on allocation and release time, as an
> > opt-in feature (instead of its opt-all counterpart in PaX).
> > 
> > This avoids leaking sensitive information when memory is released to
> > the system after use, for example in cryptographic subsystems.
> > 
> > The next patches in this set deploy this flag for different
> > subsystems that could potentially leak cryptographic secrets or other
> > confidential information by means of an information leak or other kinds
> > of security bugs (ex. use of uninitialized variables or use-after-free),
> > besides extending the remanence of this data on memory (allowing
> > Iceman/coldboot attacks possible).
> > 
> > The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
> > Deallocation" paper by Jim Chow et. al from the Stanford University
> > Department of Computer Science, explains the security implications of
> > insecure deallocation, and provides extensive information with figures
> > and applications thoroughly analyzed for this behavior [1]. More recently
> > this issue came to widespread attention when the "Lest We Remember:
> > Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
> > published [2].
> 
> Seems like a particularly wasteful use of a pageflag. Why not simply
> erase the buffer before freeing in those few places where we know its
> important (ie. exactly those places you now put the pageflag in)?

What's wasteful about it? It does not conflict with anything else and
there's plenty of room for other future flags.

The idea of the patch is not merely "protecting" those few places, but
providing a clean, effective generalized method for this purpose. Your
approach means forcing all developers to remember where they have to
place this explicit clearing, and introducing unnecessary code
duplication and an ever growing list of places adding these calls.

Would you be honestly willing to oversee that job?

Point of allocation isn't the same as point of release/freeing.

Also, this let's third-party code (and other kernel interfaces)
use this feature effortlessly. Moreover, this flag allows easy
integration with MAC/security frameworks (for instance, SELinux) to mark
a process as requiring sensitive mappings, in higher level APIs. There are
plans to work on such a patch, which could be independently proposed
to the SELinux maintainers.

	Larry.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-20 21:24     ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-20 21:24 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On 22:42 Wed 20 May     , Peter Zijlstra wrote:
> On Wed, 2009-05-20 at 11:30 -0700, Larry H. wrote:
> > This patch adds support for the SENSITIVE flag to the low level page
> > allocator. An additional GFP flag is added for use with higher level
> > allocators (GFP_SENSITIVE, which implies GFP_ZERO).
> > 
> > The code is largely based off the memory sanitization feature in the
> > PaX project (licensed under the GPL v2 terms), and allows fine grained
> > marking of pages for sanitization on allocation and release time, as an
> > opt-in feature (instead of its opt-all counterpart in PaX).
> > 
> > This avoids leaking sensitive information when memory is released to
> > the system after use, for example in cryptographic subsystems.
> > 
> > The next patches in this set deploy this flag for different
> > subsystems that could potentially leak cryptographic secrets or other
> > confidential information by means of an information leak or other kinds
> > of security bugs (ex. use of uninitialized variables or use-after-free),
> > besides extending the remanence of this data on memory (allowing
> > Iceman/coldboot attacks possible).
> > 
> > The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
> > Deallocation" paper by Jim Chow et. al from the Stanford University
> > Department of Computer Science, explains the security implications of
> > insecure deallocation, and provides extensive information with figures
> > and applications thoroughly analyzed for this behavior [1]. More recently
> > this issue came to widespread attention when the "Lest We Remember:
> > Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
> > published [2].
> 
> Seems like a particularly wasteful use of a pageflag. Why not simply
> erase the buffer before freeing in those few places where we know its
> important (ie. exactly those places you now put the pageflag in)?

What's wasteful about it? It does not conflict with anything else and
there's plenty of room for other future flags.

The idea of the patch is not merely "protecting" those few places, but
providing a clean, effective generalized method for this purpose. Your
approach means forcing all developers to remember where they have to
place this explicit clearing, and introducing unnecessary code
duplication and an ever growing list of places adding these calls.

Would you be honestly willing to oversee that job?

Point of allocation isn't the same as point of release/freeing.

Also, this let's third-party code (and other kernel interfaces)
use this feature effortlessly. Moreover, this flag allows easy
integration with MAC/security frameworks (for instance, SELinux) to mark
a process as requiring sensitive mappings, in higher level APIs. There are
plans to work on such a patch, which could be independently proposed
to the SELinux maintainers.

	Larry.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 21:24     ` Larry H.
@ 2009-05-21 15:21       ` Robin Holt
  -1 siblings, 0 replies; 220+ messages in thread
From: Robin Holt @ 2009-05-21 15:21 UTC (permalink / raw)
  To: Larry H.
  Cc: Peter Zijlstra, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec

> > Seems like a particularly wasteful use of a pageflag. Why not simply
> > erase the buffer before freeing in those few places where we know its
> > important (ie. exactly those places you now put the pageflag in)?
...
> The idea of the patch is not merely "protecting" those few places, but
> providing a clean, effective generalized method for this purpose. Your
> approach means forcing all developers to remember where they have to
> place this explicit clearing, and introducing unnecessary code
> duplication and an ever growing list of places adding these calls.

I agree with the earlier.  If you know enough to set the flag, then
you know enough to call a function which does a clear before free.
Does seem like a waste of a page flag.

> Also, this let's third-party code (and other kernel interfaces)
> use this feature effortlessly. Moreover, this flag allows easy
> integration with MAC/security frameworks (for instance, SELinux) to mark
> a process as requiring sensitive mappings, in higher level APIs. There are
> plans to work on such a patch, which could be independently proposed
> to the SELinux maintainers.

That sounds like either a thread group flag or a VMA flag, not a page
flag.  If you make it a page flag, you would still need to track it
on the vma or process to handle the event where the page gets migrated
or swapped out.  Really doesn't feel like a page flag is right, but I
reserve the right to be wrong.

Robin

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 15:21       ` Robin Holt
  0 siblings, 0 replies; 220+ messages in thread
From: Robin Holt @ 2009-05-21 15:21 UTC (permalink / raw)
  To: Larry H.
  Cc: Peter Zijlstra, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec

> > Seems like a particularly wasteful use of a pageflag. Why not simply
> > erase the buffer before freeing in those few places where we know its
> > important (ie. exactly those places you now put the pageflag in)?
...
> The idea of the patch is not merely "protecting" those few places, but
> providing a clean, effective generalized method for this purpose. Your
> approach means forcing all developers to remember where they have to
> place this explicit clearing, and introducing unnecessary code
> duplication and an ever growing list of places adding these calls.

I agree with the earlier.  If you know enough to set the flag, then
you know enough to call a function which does a clear before free.
Does seem like a waste of a page flag.

> Also, this let's third-party code (and other kernel interfaces)
> use this feature effortlessly. Moreover, this flag allows easy
> integration with MAC/security frameworks (for instance, SELinux) to mark
> a process as requiring sensitive mappings, in higher level APIs. There are
> plans to work on such a patch, which could be independently proposed
> to the SELinux maintainers.

That sounds like either a thread group flag or a VMA flag, not a page
flag.  If you make it a page flag, you would still need to track it
on the vma or process to handle the event where the page gets migrated
or swapped out.  Really doesn't feel like a page flag is right, but I
reserve the right to be wrong.

Robin

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 15:21       ` Robin Holt
@ 2009-05-21 18:43         ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-21 18:43 UTC (permalink / raw)
  To: Robin Holt
  Cc: Peter Zijlstra, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec

On 10:21 Thu 21 May     , Robin Holt wrote:
> I agree with the earlier.  If you know enough to set the flag, then
> you know enough to call a function which does a clear before free.
> Does seem like a waste of a page flag.

Again, place of allocation doesn't necessarily equal place of freeing,
and it's most normally never the same location. And those calls are
unnecessary code duplication and overhead which is suboptimal.

Would you also be willing to oversee the job of watching where these
calls will require placement, who uses them and how? The design decision
of forcing people to write extra code for clearing seems wasteful if you
can simply tell them to use a flag, which brings the same benefits at no
extra cost. Plus any future benefits if it's developed further more.

> That sounds like either a thread group flag or a VMA flag, not a page
> flag.  If you make it a page flag, you would still need to track it
> on the vma or process to handle the event where the page gets migrated
> or swapped out.  Really doesn't feel like a page flag is right, but I
> reserve the right to be wrong.

The patch adds a GFP flag and a slab flag for lookaside caches, both
tied to the lower level page flag. I've implemented a task flag for the
process sensitive marking but that one was kept out of the patchset to
keep it simple for now, and as clean and sane as possible. You are right
that this needs to be tracked down in other interfaces to avoid memory
from being swapped to disk, but I didn't want to include this in the
current patchset until I had positive feedback. Either way, those
changes are trivial.

Also, how would you track what memory is sensitive and what isn't? The
clearing only solves one of the issues here. Deterring data resilience
is far more than just zeroing a buffer.

I would like to request inclusion unless more substantial problems are
found with this patchset.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 18:43         ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-21 18:43 UTC (permalink / raw)
  To: Robin Holt
  Cc: Peter Zijlstra, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec

On 10:21 Thu 21 May     , Robin Holt wrote:
> I agree with the earlier.  If you know enough to set the flag, then
> you know enough to call a function which does a clear before free.
> Does seem like a waste of a page flag.

Again, place of allocation doesn't necessarily equal place of freeing,
and it's most normally never the same location. And those calls are
unnecessary code duplication and overhead which is suboptimal.

Would you also be willing to oversee the job of watching where these
calls will require placement, who uses them and how? The design decision
of forcing people to write extra code for clearing seems wasteful if you
can simply tell them to use a flag, which brings the same benefits at no
extra cost. Plus any future benefits if it's developed further more.

> That sounds like either a thread group flag or a VMA flag, not a page
> flag.  If you make it a page flag, you would still need to track it
> on the vma or process to handle the event where the page gets migrated
> or swapped out.  Really doesn't feel like a page flag is right, but I
> reserve the right to be wrong.

The patch adds a GFP flag and a slab flag for lookaside caches, both
tied to the lower level page flag. I've implemented a task flag for the
process sensitive marking but that one was kept out of the patchset to
keep it simple for now, and as clean and sane as possible. You are right
that this needs to be tracked down in other interfaces to avoid memory
from being swapped to disk, but I didn't want to include this in the
current patchset until I had positive feedback. Either way, those
changes are trivial.

Also, how would you track what memory is sensitive and what isn't? The
clearing only solves one of the issues here. Deterring data resilience
is far more than just zeroing a buffer.

I would like to request inclusion unless more substantial problems are
found with this patchset.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 20:42   ` Peter Zijlstra
@ 2009-05-21 19:08     ` Rik van Riel
  -1 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-21 19:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Larry H., linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Peter Zijlstra wrote:

> Seems like a particularly wasteful use of a pageflag. Why not simply
> erase the buffer before freeing in those few places where we know its
> important (ie. exactly those places you now put the pageflag in)?

You don't always know this at page free time.

I could see the PG_sensitive flag being used from
userspace through mmap or madvise flags.  This way
the sensitive memory from a program like gpg would
be cleaned, even if gpg died in a segfault accident.

I could also imagine the suspend-to-disk code skipping
PG_sensitive pages when storing data to disk, and
replacing it with some magic signature so programs
that use special PG_sensitive buffers can know that
their crypto key disappeared after a restore.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 19:08     ` Rik van Riel
  0 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-21 19:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Larry H., linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Peter Zijlstra wrote:

> Seems like a particularly wasteful use of a pageflag. Why not simply
> erase the buffer before freeing in those few places where we know its
> important (ie. exactly those places you now put the pageflag in)?

You don't always know this at page free time.

I could see the PG_sensitive flag being used from
userspace through mmap or madvise flags.  This way
the sensitive memory from a program like gpg would
be cleaned, even if gpg died in a segfault accident.

I could also imagine the suspend-to-disk code skipping
PG_sensitive pages when storing data to disk, and
replacing it with some magic signature so programs
that use special PG_sensitive buffers can know that
their crypto key disappeared after a restore.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 18:30 ` Larry H.
  (?)
  (?)
@ 2009-05-21 19:17 ` Rik van Riel
  2009-05-21 19:30   ` Larry H.
  2009-05-22  7:34   ` Ingo Molnar
  -1 siblings, 2 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-21 19:17 UTC (permalink / raw)
  To: Larry H.; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Larry H. wrote:
> This patch adds support for the SENSITIVE flag to the low level page
> allocator. An additional GFP flag is added for use with higher level
> allocators (GFP_SENSITIVE, which implies GFP_ZERO).

Sensitive to what?  Allocation failures?

Kidding, I read the rest of your emails.  However,
chances are whoever runs into the code later on
will not read everything.

Would GFP_CONFIDENTIAL & PG_confidential be a better
name, since it indicates the page stores confidential
information, which should not be leaked?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 19:08     ` Rik van Riel
@ 2009-05-21 19:26       ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-21 19:26 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Peter Zijlstra, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

> You don't always know this at page free time.

You do at buffer free time.

> I could see the PG_sensitive flag being used from
> userspace through mmap or madvise flags.  This way
> the sensitive memory from a program like gpg would
> be cleaned, even if gpg died in a segfault accident.

Still doesn't need a page flag - that is a vma flag which is far cheaper.
Also means you can get rid of the stupid mlock() misuse by things like
GPG to work around OS weaknesses by crypting the page if it hits
disk/swap/whatever.

> I could also imagine the suspend-to-disk code skipping
> PG_sensitive pages when storing data to disk, and
> replacing it with some magic signature so programs
> that use special PG_sensitive buffers can know that
> their crypto key disappeared after a restore.

Its irrelevant in the simple S2D case. I just patch other bits of the
suspend image to mail me the new key later. The right answer is crypted
swap combined with a hard disk password and thus a crypted and locked
suspend image. Playing the "I must not miss any page which might be
sensitive even compiler stack copies and library buffers I don't know
about" game is not going to build you a secure system - its simply
*lousy* engineering and design.

Basically though - loss of physical control means you have to assue the
recovered system is compromised. I doubt even TC is going to manage to
spot firmware compromises on your CD-ROM drive, which thanks to the film
industry creating a demand for altered firmware is a well understood
field...

The cost of doing crypto on suspend to disk relative to media speed is
basically irrelevant on a PC today. In the S2R case you might want to
crypt those pages against an electronic pure read of RAM type attack but
this is getting into serious spook territory.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 19:26       ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-21 19:26 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Peter Zijlstra, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

> You don't always know this at page free time.

You do at buffer free time.

> I could see the PG_sensitive flag being used from
> userspace through mmap or madvise flags.  This way
> the sensitive memory from a program like gpg would
> be cleaned, even if gpg died in a segfault accident.

Still doesn't need a page flag - that is a vma flag which is far cheaper.
Also means you can get rid of the stupid mlock() misuse by things like
GPG to work around OS weaknesses by crypting the page if it hits
disk/swap/whatever.

> I could also imagine the suspend-to-disk code skipping
> PG_sensitive pages when storing data to disk, and
> replacing it with some magic signature so programs
> that use special PG_sensitive buffers can know that
> their crypto key disappeared after a restore.

Its irrelevant in the simple S2D case. I just patch other bits of the
suspend image to mail me the new key later. The right answer is crypted
swap combined with a hard disk password and thus a crypted and locked
suspend image. Playing the "I must not miss any page which might be
sensitive even compiler stack copies and library buffers I don't know
about" game is not going to build you a secure system - its simply
*lousy* engineering and design.

Basically though - loss of physical control means you have to assue the
recovered system is compromised. I doubt even TC is going to manage to
spot firmware compromises on your CD-ROM drive, which thanks to the film
industry creating a demand for altered firmware is a well understood
field...

The cost of doing crypto on suspend to disk relative to media speed is
basically irrelevant on a PC today. In the S2R case you might want to
crypt those pages against an electronic pure read of RAM type attack but
this is getting into serious spook territory.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 19:17 ` Rik van Riel
@ 2009-05-21 19:30   ` Larry H.
  2009-05-22  7:34   ` Ingo Molnar
  1 sibling, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-21 19:30 UTC (permalink / raw)
  To: Rik van Riel; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On 15:17 Thu 21 May     , Rik van Riel wrote:
> Sensitive to what?  Allocation failures?
>
> Kidding, I read the rest of your emails.  However,
> chances are whoever runs into the code later on
> will not read everything.
>
> Would GFP_CONFIDENTIAL & PG_confidential be a better
> name, since it indicates the page stores confidential
> information, which should not be leaked?

Definitely, I see your point here and this will be modified in the code.
GFP_CONFIDENTIAL and PG_confidential is more specific and won't raise
any confusion when people read the code or want to use the flags.

Thanks for the input.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 19:26       ` Alan Cox
@ 2009-05-21 19:56         ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-21 19:56 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

On 20:26 Thu 21 May     , Alan Cox wrote:
> > You don't always know this at page free time.
> 
> You do at buffer free time.

Alan, I think you will agree with me that forcing people to know what
they have to do exactly with their buffers when they will contain
confidential/sensitive data is suboptimal. Like it's been said before,
the clearing isn't the only issue here. We have pagination to disk,
re-allocation leaks, etc.

Rik conveniently recommended me to write a threat model for this and
that's exactly what will be done so the issues are clarified further
more. The text had been omitted form the patches to keep them reasonably
small.

> Still doesn't need a page flag - that is a vma flag which is far cheaper.
> Also means you can get rid of the stupid mlock() misuse by things like
> GPG to work around OS weaknesses by crypting the page if it hits
> disk/swap/whatever.

I had the intention to cover cases like gnupg's approach to
pseudo-secure memory (their mlock pool, the three pass memset wipe, etc)
with this implementation.

We would need to look into a sane approach for encrypting the data. That
was out of scope for my patches, so far. It adds further complexity and
might require more invasive changes (if we want to let the user select
the algorithm on runtime, etc).

Do you suggest a vma flag should be created for this as well?

> > I could also imagine the suspend-to-disk code skipping
> > PG_sensitive pages when storing data to disk, and
> > replacing it with some magic signature so programs
> > that use special PG_sensitive buffers can know that
> > their crypto key disappeared after a restore.
> 
> Its irrelevant in the simple S2D case. I just patch other bits of the
> suspend image to mail me the new key later. The right answer is crypted
> swap combined with a hard disk password and thus a crypted and locked
> suspend image. Playing the "I must not miss any page which might be
> sensitive even compiler stack copies and library buffers I don't know
> about" game is not going to build you a secure system - its simply
> *lousy* engineering and design.

The point is that the keys or sensitive marked pages should never, ever
be swapped to disk, by any means. Right now the patch only affects
kernel code, the task related flag and functionality patches haven't been
submitted yet.

Regarding retrieving the encryption keys, IVs, and so forth, why bother
reading the data remaining on disk? You can just retrieve them off
memory (ex. via rogue driver or some re-allocation bug scenario,
information leak or similar issue) and that's it.

> 
> Basically though - loss of physical control means you have to assue the
> recovered system is compromised. I doubt even TC is going to manage to
> spot firmware compromises on your CD-ROM drive, which thanks to the film
> industry creating a demand for altered firmware is a well understood
> field..

I don't see what physical compromise of the machine has to do with
anything about this patch and the issues it addresses. Although, the
real benefits from TC will be more about memory containment, untrusted
code injection prevention and so forth.

Basically things like preventing SELinux from being disabled via some
kernel vulnerability which let's an attacker abuse a write-4 primitive
(on x86_64 as well). Or patching the pagetables to make the kernel text
writable. Or injecting code in the .rodata section. Or redirecting an
IDT gate to some RWX mapping. The list goes on.

While other vendors might use this technology for locking down their
users, mutilating their rights and constrain their legitimate use of
their systems, we can use this technology for a beneficial purpose.
After all, that was the beauty of Linux since the start. We don't need
to follow a political or corporate agenda in these regards. Right?

> The cost of doing crypto on suspend to disk relative to media speed is
> basically irrelevant on a PC today. In the S2R case you might want to
> crypt those pages against an electronic pure read of RAM type attack but
> this is getting into serious spook territory.

If someone has access to an oscilloscope and the required equipment to
read data directly in that manner, well, the problem isn't that they
have access to your hardware. The problem is that you pissed off the
wrong people. And the list of things to attract such attention is
still, fortunately, short. Or so we believe.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 19:56         ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-21 19:56 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

On 20:26 Thu 21 May     , Alan Cox wrote:
> > You don't always know this at page free time.
> 
> You do at buffer free time.

Alan, I think you will agree with me that forcing people to know what
they have to do exactly with their buffers when they will contain
confidential/sensitive data is suboptimal. Like it's been said before,
the clearing isn't the only issue here. We have pagination to disk,
re-allocation leaks, etc.

Rik conveniently recommended me to write a threat model for this and
that's exactly what will be done so the issues are clarified further
more. The text had been omitted form the patches to keep them reasonably
small.

> Still doesn't need a page flag - that is a vma flag which is far cheaper.
> Also means you can get rid of the stupid mlock() misuse by things like
> GPG to work around OS weaknesses by crypting the page if it hits
> disk/swap/whatever.

I had the intention to cover cases like gnupg's approach to
pseudo-secure memory (their mlock pool, the three pass memset wipe, etc)
with this implementation.

We would need to look into a sane approach for encrypting the data. That
was out of scope for my patches, so far. It adds further complexity and
might require more invasive changes (if we want to let the user select
the algorithm on runtime, etc).

Do you suggest a vma flag should be created for this as well?

> > I could also imagine the suspend-to-disk code skipping
> > PG_sensitive pages when storing data to disk, and
> > replacing it with some magic signature so programs
> > that use special PG_sensitive buffers can know that
> > their crypto key disappeared after a restore.
> 
> Its irrelevant in the simple S2D case. I just patch other bits of the
> suspend image to mail me the new key later. The right answer is crypted
> swap combined with a hard disk password and thus a crypted and locked
> suspend image. Playing the "I must not miss any page which might be
> sensitive even compiler stack copies and library buffers I don't know
> about" game is not going to build you a secure system - its simply
> *lousy* engineering and design.

The point is that the keys or sensitive marked pages should never, ever
be swapped to disk, by any means. Right now the patch only affects
kernel code, the task related flag and functionality patches haven't been
submitted yet.

Regarding retrieving the encryption keys, IVs, and so forth, why bother
reading the data remaining on disk? You can just retrieve them off
memory (ex. via rogue driver or some re-allocation bug scenario,
information leak or similar issue) and that's it.

> 
> Basically though - loss of physical control means you have to assue the
> recovered system is compromised. I doubt even TC is going to manage to
> spot firmware compromises on your CD-ROM drive, which thanks to the film
> industry creating a demand for altered firmware is a well understood
> field..

I don't see what physical compromise of the machine has to do with
anything about this patch and the issues it addresses. Although, the
real benefits from TC will be more about memory containment, untrusted
code injection prevention and so forth.

Basically things like preventing SELinux from being disabled via some
kernel vulnerability which let's an attacker abuse a write-4 primitive
(on x86_64 as well). Or patching the pagetables to make the kernel text
writable. Or injecting code in the .rodata section. Or redirecting an
IDT gate to some RWX mapping. The list goes on.

While other vendors might use this technology for locking down their
users, mutilating their rights and constrain their legitimate use of
their systems, we can use this technology for a beneficial purpose.
After all, that was the beauty of Linux since the start. We don't need
to follow a political or corporate agenda in these regards. Right?

> The cost of doing crypto on suspend to disk relative to media speed is
> basically irrelevant on a PC today. In the S2R case you might want to
> crypt those pages against an electronic pure read of RAM type attack but
> this is getting into serious spook territory.

If someone has access to an oscilloscope and the required equipment to
read data directly in that manner, well, the problem isn't that they
have access to your hardware. The problem is that you pissed off the
wrong people. And the list of things to attract such attention is
still, fortunately, short. Or so we believe.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 19:56         ` Larry H.
@ 2009-05-21 20:47           ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-21 20:47 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

> > You do at buffer free time.
> 
> Alan, I think you will agree with me that forcing people to know what
> they have to do exactly with their buffers when they will contain
> confidential/sensitive data is suboptimal. Like it's been said before,

In the kernel no and page flags are very precious, very few and if we run
out will cost us a vast amount of extra kernel memory. If page flags were
free the question would be trivial - but they are not. Thus it is worth
asking whether its actually harder to remember to zap the buffer or set
and clear the flag correctly.

> the clearing isn't the only issue here. We have pagination to disk,
> re-allocation leaks, etc.

There is no kernel paging (except virtualised but that is an entire other
can of worms we shouldn't open), you can handle reallocation concerns
without page flags by using a SLAB type for 'secure' allocations which
clears the entry on free.

> > Still doesn't need a page flag - that is a vma flag which is far cheaper.
> > Also means you can get rid of the stupid mlock() misuse by things like
> > GPG to work around OS weaknesses by crypting the page if it hits
> > disk/swap/whatever.
> 
> Do you suggest a vma flag should be created for this as well?

You don't need a page flag, just a per vma flag and something akin to
madvise() to set the flag on the VMA (and/or split the VMA for partial
maps as we do for anything else). VMA flags are cheap.

> The point is that the keys or sensitive marked pages should never, ever
> be swapped to disk, by any means. Right now the patch only affects
> kernel code, the task related flag and functionality patches haven't been
> submitted yet.

If you are paging them to a crypted filestore they should be safe on
disk. What is your problem with that ? If your suspend image is
compromised it doesn't really matter if you wiped the data as what you
resume may then wait for the new keys and compromise those. In fact
having a page flag makes it easier for the attack code to know what to
capture and send to the bad guys...

> Regarding retrieving the encryption keys, IVs, and so forth, why bother
> reading the data remaining on disk? You can just retrieve them off
> memory (ex. via rogue driver or some re-allocation bug scenario,
> information leak or similar issue) and that's it.

I was assuming you'd wipe such data from memory on a suspend to disk.
However on a suspend to disk its basically as cheap to wipe all of memory
and safer than wiping random bits and praying you know what the compiler
did and you know what some other bit of library did.

> After all, that was the beauty of Linux since the start. We don't need
> to follow a political or corporate agenda in these regards. Right?

Indeed - but a technically sound solution that doesn't waste a page flag
is still important. It's btw not as simple as a page flag anyway - the
kernel stores some stuff in places that do not have page flags, it also
has kmaps and other things that will give you suprises.

Perhaps you should post your threat model to go with the patches. At the
moment your model doesn't seem to make sense.

Surely we can attack the problem far more directly for all but S2R by

- choosing to use encrypted swap and encrypted S2D images (already
  possible)
- wiping the in memory image on S2D if the user chooses (which would be
  smart)

That has the advantage that nobody has to label pages sensitive - which
is flawed anyway, we want to label pages "non-sensitive" in the ideal
world so we default secure.




^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 20:47           ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-21 20:47 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

> > You do at buffer free time.
> 
> Alan, I think you will agree with me that forcing people to know what
> they have to do exactly with their buffers when they will contain
> confidential/sensitive data is suboptimal. Like it's been said before,

In the kernel no and page flags are very precious, very few and if we run
out will cost us a vast amount of extra kernel memory. If page flags were
free the question would be trivial - but they are not. Thus it is worth
asking whether its actually harder to remember to zap the buffer or set
and clear the flag correctly.

> the clearing isn't the only issue here. We have pagination to disk,
> re-allocation leaks, etc.

There is no kernel paging (except virtualised but that is an entire other
can of worms we shouldn't open), you can handle reallocation concerns
without page flags by using a SLAB type for 'secure' allocations which
clears the entry on free.

> > Still doesn't need a page flag - that is a vma flag which is far cheaper.
> > Also means you can get rid of the stupid mlock() misuse by things like
> > GPG to work around OS weaknesses by crypting the page if it hits
> > disk/swap/whatever.
> 
> Do you suggest a vma flag should be created for this as well?

You don't need a page flag, just a per vma flag and something akin to
madvise() to set the flag on the VMA (and/or split the VMA for partial
maps as we do for anything else). VMA flags are cheap.

> The point is that the keys or sensitive marked pages should never, ever
> be swapped to disk, by any means. Right now the patch only affects
> kernel code, the task related flag and functionality patches haven't been
> submitted yet.

If you are paging them to a crypted filestore they should be safe on
disk. What is your problem with that ? If your suspend image is
compromised it doesn't really matter if you wiped the data as what you
resume may then wait for the new keys and compromise those. In fact
having a page flag makes it easier for the attack code to know what to
capture and send to the bad guys...

> Regarding retrieving the encryption keys, IVs, and so forth, why bother
> reading the data remaining on disk? You can just retrieve them off
> memory (ex. via rogue driver or some re-allocation bug scenario,
> information leak or similar issue) and that's it.

I was assuming you'd wipe such data from memory on a suspend to disk.
However on a suspend to disk its basically as cheap to wipe all of memory
and safer than wiping random bits and praying you know what the compiler
did and you know what some other bit of library did.

> After all, that was the beauty of Linux since the start. We don't need
> to follow a political or corporate agenda in these regards. Right?

Indeed - but a technically sound solution that doesn't waste a page flag
is still important. It's btw not as simple as a page flag anyway - the
kernel stores some stuff in places that do not have page flags, it also
has kmaps and other things that will give you suprises.

Perhaps you should post your threat model to go with the patches. At the
moment your model doesn't seem to make sense.

Surely we can attack the problem far more directly for all but S2R by

- choosing to use encrypted swap and encrypted S2D images (already
  possible)
- wiping the in memory image on S2D if the user chooses (which would be
  smart)

That has the advantage that nobody has to label pages sensitive - which
is flawed anyway, we want to label pages "non-sensitive" in the ideal
world so we default secure.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 20:47           ` Alan Cox
  (?)
@ 2009-05-21 21:46           ` Larry H.
  2009-05-21 22:47               ` Alan Cox
  -1 siblings, 1 reply; 220+ messages in thread
From: Larry H. @ 2009-05-21 21:46 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

On 21:47 Thu 21 May     , Alan Cox wrote:
> In the kernel no and page flags are very precious, very few and if we run
> out will cost us a vast amount of extra kernel memory. If page flags were
> free the question would be trivial - but they are not. Thus it is worth
> asking whether its actually harder to remember to zap the buffer or set
> and clear the flag correctly.
> 
> There is no kernel paging (except virtualised but that is an entire other
> can of worms we shouldn't open), you can handle reallocation concerns
> without page flags by using a SLAB type for 'secure' allocations which
> clears the entry on free.
> 
> You don't need a page flag, just a per vma flag and something akin to
> madvise() to set the flag on the VMA (and/or split the VMA for partial
> maps as we do for anything else). VMA flags are cheap.

The patch already implements a SLAB_CONFIDENTIAL flag now (I finished
renaming the flags) for this purpose. This is my proposal and summary of
the changes I'll do to the patch, based off your feedback:

	1. A page flag seems to be frowned upon by you. I can understand
	this and agree that we must keep in mind that these come in
	scarce quantities. A page flag is the only way to allow the low
	level page allocator to mark pages that contain sensitive
	information (so we support this through normal gfp flags in
	get_free)pages and so forth).

	2. We can independently make SLAB/SLUB aware of a CONFIDENTIAL
	flag that:

		a) Sanitizes objects at kfree() time when they've been
		allocated with the gfp flag or they belong to a cache
		marked with the SLAB_CONFIDENTIAL flag.

		b) Does not require changes to the low level page
		allocator.

		c) Still can prevent leaks in re-allocation scenarios
		and other cases.

	3. We can implement a vma flag for this purpose and should be no
	issue to you or other maintainers.

I'll split the SLAB/SLUB changes, which add support for the flag and the
gfp counterpart, and then have a separate one which adds the page flag.
Please read my comments on the latter at the end of this email. We can
ditch the page flag patch if we finally reject that approach and stick
to the other one. I'm fine with that.

Let me know if you are keen on this approach and I'll follow with an
updated patch.

> If you are paging them to a crypted filestore they should be safe on
> disk. What is your problem with that ? If your suspend image is
> compromised it doesn't really matter if you wiped the data as what you
> resume may then wait for the new keys and compromise those. In fact
> having a page flag makes it easier for the attack code to know what to
> capture and send to the bad guys...

I wasn't talking about disk based attacks. I'm talking about a rogue
module or just some information leak which let's an user peek at known
addresses. For instance, some operating systems implement disk
encryption with IVs and keys stored as global variables. Microsoft's
BitLocker operates that way internally if they haven't changed it. Apple
does the same for swap encryption, etc.

> I was assuming you'd wipe such data from memory on a suspend to disk.
> However on a suspend to disk its basically as cheap to wipe all of memory
> and safer than wiping random bits and praying you know what the compiler
> did and you know what some other bit of library did.

Security conscious users normally disable suspend or hibernation
altogether. It's far more difficult to get it right than it seems. You
will *always* need some static place to store your key. Apple's XNU
kernel stores the key in the image header for instance. I bet other
systems do the same.

> Indeed - but a technically sound solution that doesn't waste a page flag
> is still important. It's btw not as simple as a page flag anyway - the
> kernel stores some stuff in places that do not have page flags, it also
> has kmaps and other things that will give you suprises.

I haven't identified a single place that stored potentially sensitive
information which can be reasonably protected with a simple approach
like this, that doesn't use kmalloc or the low level page allocator
directly.

I bet there are some, but there are plenty of other, more obvious, ones
which need our attention.

> Perhaps you should post your threat model to go with the patches. At the
> moment your model doesn't seem to make sense.

The threat model is simple:

	1. The kernel has interfaces which deal with likely sensitive
	information (from tty input drivers, to crypto api and network
	stack implementations).

	2. Memory allocated by these interfaces will suffer of data
	remanence problems, even post-release. This will scatter such
	information and make coldboot/Iceman attacks possible to recover
	cryptographic secrets (ex. scanning for AES key expansion blocks
	is trivial, and this has been demonstrated for RSA as well, see
	the Princeton paper about it).

	3. LIFO allocators make re-allocation leaks possible. If an
	interface allocates a buffer, stores data in it and releases it
	without clearing, a successive allocation somewhere else can
	return this same object and let the caller access the original
	contents out of the context they were meant to. If a network
	stack implementation allocates a 64 byte buffer after some
	cryptoapi ctx initialization code got another 64 byte buffer and
	released it, you've got a problem there. If an attacker couples
	an uninitialized variable usage bug with this situation, you've
	got a possibly exploitable problem there. Worst of all, is that
	he might not need such a bug for abusing it ;)

Let me know if you need any further clarifications, please.

> Surely we can attack the problem far more directly for all but S2R by
> 
> - choosing to use encrypted swap and encrypted S2D images (already
>   possible)
> - wiping the in memory image on S2D if the user chooses (which would be
>   smart)
> 
> That has the advantage that nobody has to label pages sensitive - which
> is flawed anyway, we want to label pages "non-sensitive" in the ideal
> world so we default secure.

I agree the ideal, best approach would be to sanitize all pages. If you
are interested on a patch doing just that (as long as a Kconfig option
enables it), I can provide you with a clean one. The original code in
PaX did just that.

BTW, this can be extrapolated to .rodata and DEBUG_RODATA, as well as
the lack of mprotect restrictions for hosts with SELinux disabled (that
is, no execmem/execstack/execheap checks). We should really make .rodata
read-only by default, and disallow mprotect to produce RWX mappings by
default. Otherwise our NX is flawed. These are matters for another
patch, and a different discussion too.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 21:46           ` Larry H.
@ 2009-05-21 22:47               ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-21 22:47 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

> Let me know if you are keen on this approach and I'll follow with an
> updated patch.

Seems a good way to proceed. I think at the point you've got the SLAB/VMA
flags you'll find you don't need anything else. The VMA flag is valuable
on its own for the more relaxed security case of just wanting to crypt a
bit of swap to be safe (or to dump some stuff to a separate secure swap)
without doing all swap.

> I wasn't talking about disk based attacks. I'm talking about a rogue
> module or just some information leak which let's an user peek at known

If you've got a rogue module you already lost, except that by marking
what is sensitive you made the bad guys job easier. Bit like the way
people visually overlay maps and overhead shots from multiple sources and
the 'scrubbed' secret locations stand out for you and are easier to find
than if they were left.

If you are looking for a credit card number in 6GB of RAM you at least
have a chaffing style defence right now.

> Security conscious users normally disable suspend or hibernation
> altogether. It's far more difficult to get it right than it seems. You
> will *always* need some static place to store your key. Apple's XNU

On the systems I've used and use that key (or its passphrase) resides in
the user. That has its own compromise problems (if I can borrow your
laptop I can trojan the key input) but it does stop basic steal and
decrypt attacks.

> I haven't identified a single place that stored potentially sensitive
> information which can be reasonably protected with a simple approach
> like this, that doesn't use kmalloc or the low level page allocator
> directly.

Obvious candidates would be AGPGart, DRI buffers, DMA lowmem buffering,
pad buffers - I dont think they clear all cases and in some cases
(notably DRI) there is data that is potentially "secret" stored in the
video RAM.

You can also extract bits of data post clear out of fascinating corners
like the debug interfaces to FIFOs on I/O controllers. There are also a
large category of buffers that don't get freed/reallocated notably ring
buffers for networking, and tty ring buffers which are mostly not freed
for the lifetime of the device (ie forever). Cleaning all RAM as an
option on S2D and shutdown would be the only real way you'd fix that.

> 	1. The kernel has interfaces which deal with likely sensitive
> 	information (from tty input drivers, to crypto api and network
> 	stack implementations).

However you can't tell what is sensitive, you must assume anything is.
Even a graphic pixmap might be sensitive or an executable's presence
might reveal things.
 
> 	2. Memory allocated by these interfaces will suffer of data
> 	remanence problems, even post-release. This will scatter such
> 	information and make coldboot/Iceman attacks possible to recover
> 	cryptographic secrets (ex. scanning for AES key expansion blocks
> 	is trivial, and this has been demonstrated for RSA as well, see
> 	the Princeton paper about it).

I would argue the logical follow-on from the fact you don't know what is
secure, combined with the basic security concept that you start from the
secure position and relax rights is that the only safe way to do this is
to have an option which turns on scrubbing on free for *all* objects.
Ditto clearing all memory on S2D.
 
> 	3. LIFO allocators make re-allocation leaks possible. If an
> 	interface allocates a buffer, stores data in it and releases it

No argument.

> I agree the ideal, best approach would be to sanitize all pages. If you
> are interested on a patch doing just that (as long as a Kconfig option
> enables it), I can provide you with a clean one. The original code in
> PaX did just that.

Runtime would be even better (and I think you can argue boot one way or
the other). That way distributions can ship the feature for people
who want it but without the performance hit.

> is, no execmem/execstack/execheap checks). We should really make .rodata
> read-only by default, and disallow mprotect to produce RWX mappings by
> default. Otherwise our NX is flawed. These are matters for another
> patch, and a different discussion too.

Thats what SELinux is for and with SELinux you can default that way and
relax (as you have to because there are lots of things that produce RWX
mappings).

A similar problem is the lack of kernel side true read-only, which is a
weakness in the hypervisors. Physical hardware can't do irrevocable read
only but it is perfectly doable for a guest under a hypervisor, and the
hypervisor kernel can often be configured to have vastly less external
exposure. (Rik that's a hint to remind the KVM people ;))


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-21 22:47               ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-21 22:47 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

> Let me know if you are keen on this approach and I'll follow with an
> updated patch.

Seems a good way to proceed. I think at the point you've got the SLAB/VMA
flags you'll find you don't need anything else. The VMA flag is valuable
on its own for the more relaxed security case of just wanting to crypt a
bit of swap to be safe (or to dump some stuff to a separate secure swap)
without doing all swap.

> I wasn't talking about disk based attacks. I'm talking about a rogue
> module or just some information leak which let's an user peek at known

If you've got a rogue module you already lost, except that by marking
what is sensitive you made the bad guys job easier. Bit like the way
people visually overlay maps and overhead shots from multiple sources and
the 'scrubbed' secret locations stand out for you and are easier to find
than if they were left.

If you are looking for a credit card number in 6GB of RAM you at least
have a chaffing style defence right now.

> Security conscious users normally disable suspend or hibernation
> altogether. It's far more difficult to get it right than it seems. You
> will *always* need some static place to store your key. Apple's XNU

On the systems I've used and use that key (or its passphrase) resides in
the user. That has its own compromise problems (if I can borrow your
laptop I can trojan the key input) but it does stop basic steal and
decrypt attacks.

> I haven't identified a single place that stored potentially sensitive
> information which can be reasonably protected with a simple approach
> like this, that doesn't use kmalloc or the low level page allocator
> directly.

Obvious candidates would be AGPGart, DRI buffers, DMA lowmem buffering,
pad buffers - I dont think they clear all cases and in some cases
(notably DRI) there is data that is potentially "secret" stored in the
video RAM.

You can also extract bits of data post clear out of fascinating corners
like the debug interfaces to FIFOs on I/O controllers. There are also a
large category of buffers that don't get freed/reallocated notably ring
buffers for networking, and tty ring buffers which are mostly not freed
for the lifetime of the device (ie forever). Cleaning all RAM as an
option on S2D and shutdown would be the only real way you'd fix that.

> 	1. The kernel has interfaces which deal with likely sensitive
> 	information (from tty input drivers, to crypto api and network
> 	stack implementations).

However you can't tell what is sensitive, you must assume anything is.
Even a graphic pixmap might be sensitive or an executable's presence
might reveal things.
 
> 	2. Memory allocated by these interfaces will suffer of data
> 	remanence problems, even post-release. This will scatter such
> 	information and make coldboot/Iceman attacks possible to recover
> 	cryptographic secrets (ex. scanning for AES key expansion blocks
> 	is trivial, and this has been demonstrated for RSA as well, see
> 	the Princeton paper about it).

I would argue the logical follow-on from the fact you don't know what is
secure, combined with the basic security concept that you start from the
secure position and relax rights is that the only safe way to do this is
to have an option which turns on scrubbing on free for *all* objects.
Ditto clearing all memory on S2D.
 
> 	3. LIFO allocators make re-allocation leaks possible. If an
> 	interface allocates a buffer, stores data in it and releases it

No argument.

> I agree the ideal, best approach would be to sanitize all pages. If you
> are interested on a patch doing just that (as long as a Kconfig option
> enables it), I can provide you with a clean one. The original code in
> PaX did just that.

Runtime would be even better (and I think you can argue boot one way or
the other). That way distributions can ship the feature for people
who want it but without the performance hit.

> is, no execmem/execstack/execheap checks). We should really make .rodata
> read-only by default, and disallow mprotect to produce RWX mappings by
> default. Otherwise our NX is flawed. These are matters for another
> patch, and a different discussion too.

Thats what SELinux is for and with SELinux you can default that way and
relax (as you have to because there are lots of things that produce RWX
mappings).

A similar problem is the lack of kernel side true read-only, which is a
weakness in the hypervisors. Physical hardware can't do irrevocable read
only but it is perfectly doable for a guest under a hypervisor, and the
hypervisor kernel can often be configured to have vastly less external
exposure. (Rik that's a hint to remind the KVM people ;))

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 19:17 ` Rik van Riel
  2009-05-21 19:30   ` Larry H.
@ 2009-05-22  7:34   ` Ingo Molnar
  2009-05-22 11:38       ` Larry H.
  1 sibling, 1 reply; 220+ messages in thread
From: Ingo Molnar @ 2009-05-22  7:34 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Larry H., linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, Alan Cox


* Rik van Riel <riel@redhat.com> wrote:

> Larry H. wrote:
>> This patch adds support for the SENSITIVE flag to the low level page
>> allocator. An additional GFP flag is added for use with higher level
>> allocators (GFP_SENSITIVE, which implies GFP_ZERO).
>
> Sensitive to what?  Allocation failures?
>
> Kidding, I read the rest of your emails.  However,
> chances are whoever runs into the code later on
> will not read everything.
>
> Would GFP_CONFIDENTIAL & PG_confidential be a better
> name, since it indicates the page stores confidential
> information, which should not be leaked?

The whole kernel contains data that 'should not be leaked'.

_If_ any of this is done, i'd _very_ strongly suggest to describe it 
by what it does, not by what its subjective security attribute is.

'PG_eyes_only' or 'PG_eagle_azf_compartmented' is silly naming. It 
is silly because it hardcodes one particular expectation/model of 
'security'.

GFP_NON_PERSISTENT & PG_non_persistent is a _lot_ better, because it 
is a technical description of how information spreads. (which is the 
underlying principle of every security model)

That name alone tells us everyting what this does: it does not allow 
this data to reach or touch persistent storage. It wont be swapped 
and it wont by saved by hibernation. It will also be cleared when 
freed, to achieve its goal of never touching persistent storage.

What (if any) security relevance this has, is left to the user of 
such facilities.

In-kernel crypto key storage using GFP_NON_PERSISTENT makes some 
sense - as long as the kernel stack itself is mared 
GFP_NON_PERSISTENT as well ... which is quite hairy from a 
performance point of view: we _dont_ want to clear the full stack 
page for every kernel thread exiting.

For user-space keys it is easier to isolate the spreading of that 
data, because the kernel never reads it. So MAP_NON_PERSISTENT makes 
some sense.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 22:47               ` Alan Cox
@ 2009-05-22 11:22                 ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-22 11:22 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

On 23:47 Thu 21 May     , Alan Cox wrote:
> Seems a good way to proceed. I think at the point you've got the SLAB/VMA
> flags you'll find you don't need anything else. The VMA flag is valuable
> on its own for the more relaxed security case of just wanting to crypt a
> bit of swap to be safe (or to dump some stuff to a separate secure swap)
> without doing all swap.
> 
> If you've got a rogue module you already lost, except that by marking
> what is sensitive you made the bad guys job easier.

Definitely, but there's no need for this at all. If you want to target
certain sensitive data, just grep the variable names in the
world-readable System.map of your distribution of choice.

> Obvious candidates would be AGPGart, DRI buffers, DMA lowmem buffering,
> pad buffers - I dont think they clear all cases and in some cases
> (notably DRI) there is data that is potentially "secret" stored in the
> video RAM.

Overkill. Again, you really don't need to scan memory for anything. Much
less video memory. If you already have CAP_SYS_RAWIO, you have more
reliable and easier techniques to intercept information.

> You can also extract bits of data post clear out of fascinating corners
> like the debug interfaces to FIFOs on I/O controllers. There are also a
> large category of buffers that don't get freed/reallocated notably ring
> buffers for networking, and tty ring buffers which are mostly not freed
> for the lifetime of the device (ie forever). Cleaning all RAM as an
> option on S2D and shutdown would be the only real way you'd fix that.

One of the patches takes care of tty buffer management to adopt the new
flag. The only real way to solve the lengthy list of security risks
coming along suspend-to-disk approaches is to simply disable
suspend-to-disk altogether.
 
> However you can't tell what is sensitive, you must assume anything is.
> Even a graphic pixmap might be sensitive or an executable's presence
> might reveal things.

So far we can start off by assuming the cryptoapi kmallocated buffers
are sensitive, especially those containing context information. The tty
buffer management ones too. And so are the AF_KEY ones. Etc.

Step by step, we'll get to full system memory labeling when there's an
actual hardware enforcement platform that allows us to implement this in
a manner which actually works, and isn't subject to being subverted as is
any other current security mechanism present in the Linux or other OS
kernels.

> I would argue the logical follow-on from the fact you don't know what is
> secure, combined with the basic security concept that you start from the
> secure position and relax rights is that the only safe way to do this is
> to have an option which turns on scrubbing on free for *all* objects.
> Ditto clearing all memory on S2D.

We're still talking about preventing kernel memory leaks right now, but
mostly everyone around assumed userland is being discussed. That will
come later, though.

> Runtime would be even better (and I think you can argue boot one way or
> the other). That way distributions can ship the feature for people
> who want it but without the performance hit.

Alright, if you want that (no config option, but boot cmdline option), I
can follow up with a tested patch today. If timing constraints allow it,
I might be able to provide the updated patchset for SLAB/vma flags too.

I'll follow-up to this thread with those.

> Thats what SELinux is for and with SELinux you can default that way and
> relax (as you have to because there are lots of things that produce RWX
> mappings).

I would have preferred to have a binary marking as well, but let's keep
this discussion for a future thread. You can fire me a private email as
well, or start a new thread.

> A similar problem is the lack of kernel side true read-only, which is a
> weakness in the hypervisors. Physical hardware can't do irrevocable read
> only but it is perfectly doable for a guest under a hypervisor, and the
> hypervisor kernel can often be configured to have vastly less external
> exposure. (Rik that's a hint to remind the KVM people ;))

PaX has KERNEXEC with its own methods to allow enforcement of read-only
and executable kernel pages, to a certain a degree. Perhaps a look at
its implementation could provide some ideas on these grounds.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 11:22                 ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-22 11:22 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

On 23:47 Thu 21 May     , Alan Cox wrote:
> Seems a good way to proceed. I think at the point you've got the SLAB/VMA
> flags you'll find you don't need anything else. The VMA flag is valuable
> on its own for the more relaxed security case of just wanting to crypt a
> bit of swap to be safe (or to dump some stuff to a separate secure swap)
> without doing all swap.
> 
> If you've got a rogue module you already lost, except that by marking
> what is sensitive you made the bad guys job easier.

Definitely, but there's no need for this at all. If you want to target
certain sensitive data, just grep the variable names in the
world-readable System.map of your distribution of choice.

> Obvious candidates would be AGPGart, DRI buffers, DMA lowmem buffering,
> pad buffers - I dont think they clear all cases and in some cases
> (notably DRI) there is data that is potentially "secret" stored in the
> video RAM.

Overkill. Again, you really don't need to scan memory for anything. Much
less video memory. If you already have CAP_SYS_RAWIO, you have more
reliable and easier techniques to intercept information.

> You can also extract bits of data post clear out of fascinating corners
> like the debug interfaces to FIFOs on I/O controllers. There are also a
> large category of buffers that don't get freed/reallocated notably ring
> buffers for networking, and tty ring buffers which are mostly not freed
> for the lifetime of the device (ie forever). Cleaning all RAM as an
> option on S2D and shutdown would be the only real way you'd fix that.

One of the patches takes care of tty buffer management to adopt the new
flag. The only real way to solve the lengthy list of security risks
coming along suspend-to-disk approaches is to simply disable
suspend-to-disk altogether.
 
> However you can't tell what is sensitive, you must assume anything is.
> Even a graphic pixmap might be sensitive or an executable's presence
> might reveal things.

So far we can start off by assuming the cryptoapi kmallocated buffers
are sensitive, especially those containing context information. The tty
buffer management ones too. And so are the AF_KEY ones. Etc.

Step by step, we'll get to full system memory labeling when there's an
actual hardware enforcement platform that allows us to implement this in
a manner which actually works, and isn't subject to being subverted as is
any other current security mechanism present in the Linux or other OS
kernels.

> I would argue the logical follow-on from the fact you don't know what is
> secure, combined with the basic security concept that you start from the
> secure position and relax rights is that the only safe way to do this is
> to have an option which turns on scrubbing on free for *all* objects.
> Ditto clearing all memory on S2D.

We're still talking about preventing kernel memory leaks right now, but
mostly everyone around assumed userland is being discussed. That will
come later, though.

> Runtime would be even better (and I think you can argue boot one way or
> the other). That way distributions can ship the feature for people
> who want it but without the performance hit.

Alright, if you want that (no config option, but boot cmdline option), I
can follow up with a tested patch today. If timing constraints allow it,
I might be able to provide the updated patchset for SLAB/vma flags too.

I'll follow-up to this thread with those.

> Thats what SELinux is for and with SELinux you can default that way and
> relax (as you have to because there are lots of things that produce RWX
> mappings).

I would have preferred to have a binary marking as well, but let's keep
this discussion for a future thread. You can fire me a private email as
well, or start a new thread.

> A similar problem is the lack of kernel side true read-only, which is a
> weakness in the hypervisors. Physical hardware can't do irrevocable read
> only but it is perfectly doable for a guest under a hypervisor, and the
> hypervisor kernel can often be configured to have vastly less external
> exposure. (Rik that's a hint to remind the KVM people ;))

PaX has KERNEXEC with its own methods to allow enforcement of read-only
and executable kernel pages, to a certain a degree. Perhaps a look at
its implementation could provide some ideas on these grounds.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22  7:34   ` Ingo Molnar
@ 2009-05-22 11:38       ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-22 11:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, Alan Cox, pageexec

NOTE: Let's keep the PaX Team on CC from now on, they might have further
input to this discussion. (pageexec at freemail dot hu)

On 09:34 Fri 22 May     , Ingo Molnar wrote:
> The whole kernel contains data that 'should not be leaked'.
> _If_ any of this is done, i'd _very_ strongly suggest to describe it 
> by what it does, not by what its subjective security attribute is.
> 
> 'PG_eyes_only' or 'PG_eagle_azf_compartmented' is silly naming. It 
> is silly because it hardcodes one particular expectation/model of 
> 'security'.
> 
> GFP_NON_PERSISTENT & PG_non_persistent is a _lot_ better, because it 
> is a technical description of how information spreads. (which is the 
> underlying principle of every security model)
>
> That name alone tells us everyting what this does: it does not allow 
> this data to reach or touch persistent storage. It wont be swapped 
> and it wont by saved by hibernation. It will also be cleared when 
> freed, to achieve its goal of never touching persistent storage.

The problem is that these patches have a more broad purpose and I never
mentioned persistent storage as one of them (initially). Check earlier
messages to see what has been discussed so far.

Regarding the naming changes, those have been done as of Rik's comments
and I would rather focus on the technical and implementation side now.

> In-kernel crypto key storage using GFP_NON_PERSISTENT makes some 
> sense - as long as the kernel stack itself is mared 
> GFP_NON_PERSISTENT as well ... which is quite hairy from a 
> performance point of view: we _dont_ want to clear the full stack 
> page for every kernel thread exiting.

Burning the stack there is beyond overkill.

> For user-space keys it is easier to isolate the spreading of that 
> data, because the kernel never reads it. So MAP_NON_PERSISTENT makes 
> some sense.

Yes, but that should be incremental patch once we have settled down on
the other ones.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 11:38       ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-22 11:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, Alan Cox, pageexec

NOTE: Let's keep the PaX Team on CC from now on, they might have further
input to this discussion. (pageexec at freemail dot hu)

On 09:34 Fri 22 May     , Ingo Molnar wrote:
> The whole kernel contains data that 'should not be leaked'.
> _If_ any of this is done, i'd _very_ strongly suggest to describe it 
> by what it does, not by what its subjective security attribute is.
> 
> 'PG_eyes_only' or 'PG_eagle_azf_compartmented' is silly naming. It 
> is silly because it hardcodes one particular expectation/model of 
> 'security'.
> 
> GFP_NON_PERSISTENT & PG_non_persistent is a _lot_ better, because it 
> is a technical description of how information spreads. (which is the 
> underlying principle of every security model)
>
> That name alone tells us everyting what this does: it does not allow 
> this data to reach or touch persistent storage. It wont be swapped 
> and it wont by saved by hibernation. It will also be cleared when 
> freed, to achieve its goal of never touching persistent storage.

The problem is that these patches have a more broad purpose and I never
mentioned persistent storage as one of them (initially). Check earlier
messages to see what has been discussed so far.

Regarding the naming changes, those have been done as of Rik's comments
and I would rather focus on the technical and implementation side now.

> In-kernel crypto key storage using GFP_NON_PERSISTENT makes some 
> sense - as long as the kernel stack itself is mared 
> GFP_NON_PERSISTENT as well ... which is quite hairy from a 
> performance point of view: we _dont_ want to clear the full stack 
> page for every kernel thread exiting.

Burning the stack there is beyond overkill.

> For user-space keys it is easier to isolate the spreading of that 
> data, because the kernel never reads it. So MAP_NON_PERSISTENT makes 
> some sense.

Yes, but that should be incremental patch once we have settled down on
the other ones.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 11:22                 ` Larry H.
@ 2009-05-22 13:37                   ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-22 13:37 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

> Definitely, but there's no need for this at all. If you want to target
> certain sensitive data, just grep the variable names in the
> world-readable System.map of your distribution of choice.

A lot of dynamic data will not be findable by System.map but its
certainly findable if you've got a "look mummy this one is stamped
confidential" flag then it becomes easy to find.

> > Obvious candidates would be AGPGart, DRI buffers, DMA lowmem buffering,
> > pad buffers - I dont think they clear all cases and in some cases
> > (notably DRI) there is data that is potentially "secret" stored in the
> > video RAM.
> 
> Overkill. Again, you really don't need to scan memory for anything. Much
> less video memory. If you already have CAP_SYS_RAWIO, you have more
> reliable and easier techniques to intercept information.

If you are working to clear memory then your model is totally flawed
because a lot of memory you might want to handle this way is never
deallocated.

> > You can also extract bits of data post clear out of fascinating corners
> > like the debug interfaces to FIFOs on I/O controllers. There are also a
> > large category of buffers that don't get freed/reallocated notably ring
> > buffers for networking, and tty ring buffers which are mostly not freed
> > for the lifetime of the device (ie forever). Cleaning all RAM as an
> > option on S2D and shutdown would be the only real way you'd fix that.
> 
> One of the patches takes care of tty buffer management to adopt the new
> flag. The only real way to solve the lengthy list of security risks
> coming along suspend-to-disk approaches is to simply disable
> suspend-to-disk altogether.

Which is a rather peculiar viewpoint you hold that I would disagree with
entirely.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 13:37                   ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-22 13:37 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

> Definitely, but there's no need for this at all. If you want to target
> certain sensitive data, just grep the variable names in the
> world-readable System.map of your distribution of choice.

A lot of dynamic data will not be findable by System.map but its
certainly findable if you've got a "look mummy this one is stamped
confidential" flag then it becomes easy to find.

> > Obvious candidates would be AGPGart, DRI buffers, DMA lowmem buffering,
> > pad buffers - I dont think they clear all cases and in some cases
> > (notably DRI) there is data that is potentially "secret" stored in the
> > video RAM.
> 
> Overkill. Again, you really don't need to scan memory for anything. Much
> less video memory. If you already have CAP_SYS_RAWIO, you have more
> reliable and easier techniques to intercept information.

If you are working to clear memory then your model is totally flawed
because a lot of memory you might want to handle this way is never
deallocated.

> > You can also extract bits of data post clear out of fascinating corners
> > like the debug interfaces to FIFOs on I/O controllers. There are also a
> > large category of buffers that don't get freed/reallocated notably ring
> > buffers for networking, and tty ring buffers which are mostly not freed
> > for the lifetime of the device (ie forever). Cleaning all RAM as an
> > option on S2D and shutdown would be the only real way you'd fix that.
> 
> One of the patches takes care of tty buffer management to adopt the new
> flag. The only real way to solve the lengthy list of security risks
> coming along suspend-to-disk approaches is to simply disable
> suspend-to-disk altogether.

Which is a rather peculiar viewpoint you hold that I would disagree with
entirely.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 11:38       ` Larry H.
@ 2009-05-22 13:39         ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-22 13:39 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

> > performance point of view: we _dont_ want to clear the full stack 
> > page for every kernel thread exiting.
> 
> Burning the stack there is beyond overkill.

Yet most of our historic leaks have been padding bytes in stack based
structures. Your position seems very inconsistent.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 13:39         ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-22 13:39 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

> > performance point of view: we _dont_ want to clear the full stack 
> > page for every kernel thread exiting.
> 
> Burning the stack there is beyond overkill.

Yet most of our historic leaks have been padding bytes in stack based
structures. Your position seems very inconsistent.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 13:39         ` Alan Cox
@ 2009-05-22 18:03           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-22 18:03 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On 14:39 Fri 22 May     , Alan Cox wrote:
> > > performance point of view: we _dont_ want to clear the full stack 
> > > page for every kernel thread exiting.
> > 
> > Burning the stack there is beyond overkill.
> 
> Yet most of our historic leaks have been padding bytes in stack based
> structures. Your position seems very inconsistent.

Alright, I think I had enough of the theoretical mumbo jumbo, with all
due respect. Let's get on with the show.

I'm going to present a very short analysis for different historic leaks
which had little to do with 'padding bytes in stack', but more like
arbitrary kernel memory leaked to userland, or written to disk, or sent
over the network. If by the end of this message you still
believe my position is remotely inconsistent, I'll have to politely
request you to back it up with something that can be technically and
empirically proven from both programmer and security perspectives.

1. CVE-2005-0400 aka the infamous ext2_make_empty() disaster
(http://arkoon.net/advisories/ext2-make-empty-leak.txt)

The ext2 code before 2.6.11.6 was affected by an uninitialized variable
usage vulnerability which lead to 4072 bytes worth of kernel memory
being leaked to disk, when creating a block for a new directory entry.
The affected function was ext2_make_empty() and it was fixed by adding a
memset call to zero the memory.

http://lxr.linux.no/linux+v2.6.12/fs/ext2/dir.c#L578

 594        kaddr = kmap_atomic(page, KM_USER0);
 595       memset(kaddr, 0, chunk_size);
 596        de = (struct ext2_dir_entry_2 *)kaddr;
 597        de->name_len = 1;
 598        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));

http://lxr.linux.no/linux-bk+v2.6.11.5/fs/ext2/dir.c#L578

 594        kaddr = kmap_atomic(page, KM_USER0);
 595        de = (struct ext2_dir_entry_2 *)kaddr;
 596        de->name_len = 1;
 597        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
 598        memcpy (de->name, ".\0\0", 4);

An atomic call to kmap(). This lead to widespread searching for online
ext2 images and general hilarity. And it was a longstanding issue in
the kernel, too.

2. CVE-2009-0787 aka ecryptfs_write_metadata_to_contents() leak
(commit 8faece5f906725c10e7a1f6caf84452abadbdc7b)

The ecryptfs function ecryptfs_write_metadata_to_contents() leaked up to
an entire page to userland. An incorrect size was used during the copy
operation, leading to more bytes being copied, hence the leak.

+       virt_len = crypt_stat->num_header_bytes_at_front;
+       order = get_order(virt_len);
        /* Released in this function */
-       virt = (char *)get_zeroed_page(GFP_KERNEL);
+       virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL,
	order);

3. CVE-2002-0046 aka information leak over ICMP TTL Exceeded responses
(http://archives.neohapsis.com/archives/bugtraq/2002-01/0234.html)
(http://rhn.redhat.com/errata/RHSA-2002-007.html)

Series of fragmented ICMP packets that generate an ICMP TTL
Exceeded response would include 20 bytes of arbitrary kernel memory,
sent back to the attacker. I didn't bother digging for the patch. But
you bet it has to do with kmallocated skb buffers (take a look at
http://lxr.linux.no/linux-old+v2.2.16/net/ipv4/ipip.c#L436).

4. CVE-2007-6417 aka shmem_getpage() tmpfs leak
(http://marc.info/?l=linux-kernel&amp;m=119627664702379&amp;w=2)

An issue related with tmpfs, users were able to obtain kernel memory
because the shmem_getpage() didn't always zero the memory when reusing
an allocated page. The vulnerability was present from 2.6.11 through
2.6.23.

@@ -1306,6 +1306,7 @@ repeat:
 
		info->alloced++;
 		spin_unlock(&info->lock);
+		clear_highpage(filepage);
		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
	}

If the caller provided the page already allocated, the GFP_ZERO
allocation never happened, and the page was never cleared. Interesting
issue since my patch basically ensures this doesn't happen. Nevermind.

5. CVE-2008-4113 aka sctp_getsockopt_hmac_ident() leak (< 2.6.26.4)
(commit d97240552cd98c4b07322f30f66fd9c3ba4171de)
(exploit by Jon Oberheide at http://www.milw0rm.com/exploits/7618)

In kernels before 2.6.26.4 with SCTP and the SCTP-AUTH extension
enabled, an unprivileged local can leak arbitrary kernel memory abusing
an unbounded (due to incorrect length check) copy in the
sctp_getsockopt_hmac_ident() function. The data copied comes from a
kmallocated object (the struct sctp_association *asoc). This could be
exploited with a SCTP_HMAC_IDENT IOCTL request (through sctp_getsockopt).

>From the exploit:
  *   If SCTP AUTH is enabled (net.sctp.auth_enable = 1), this exploit
  *   allow an  unprivileged user to dump an arbitrary amount (DUMP_SIZE) of
  *   kernel memory out to a file (DUMP_FILE). If SCTP AUTH is not enabled, the
  *   exploit will trigger a kernel OOPS.

It's worth noting that the commit title and description don't reveal the
true nature of the bug (a perfectly exploitable vulnerability, platform
independent like most other information leaks):
"sctp: fix random memory dereference with SCTP_HMAC_IDENT option."

At least it's not entirely deceitful. It's definitely dereferencing
"random memory".

6. CVE-2007-1000 aka ipv6_getsockopt_sticky() leak (<2.6.20.2)
(http://bugzilla.kernel.org/show_bug.cgi?id=8134)
(commit 286930797d74b2c9a5beae84836044f6a836235f)
(exploit at http://www.milw0rm.com/exploits/4172)

The bug was initially assumed to be a simple NULL pointer dereference by
Chris Wright... but since kernel and userland address space coexist in
x86 and other architectures, this is an exploitable condition which
was used to leak kernel memory to userland after a page was allocated at
NULL by the exploit abusing the issue.

-

Further examples could be found in the commit logs or mining other places.
Also, this is the tip of the iceberg. Whatever is lurking deep inside the
kernel sources right now will only be deterred with my patch and any future
modifications that cover corner cases.

The following file contains a list of CVE numbers correlated with
commits, which comes handy to look for more examples:
http://web.mit.edu/tabbott/www/cve-data/cve-data.txt

I've saved a backup copy in case it goes offline and will put it
somewhere accessible for people on the list in such a case.

My intention here is to make the kernel more secure, not proving you
wrong or right.

You are a smart fellow and I respect your technical and kernel development
acumen. Smart people don't waste their time on meaningless banter.

I'll have the modified patches ready in an hour or so, hopefully.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 18:03           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-22 18:03 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On 14:39 Fri 22 May     , Alan Cox wrote:
> > > performance point of view: we _dont_ want to clear the full stack 
> > > page for every kernel thread exiting.
> > 
> > Burning the stack there is beyond overkill.
> 
> Yet most of our historic leaks have been padding bytes in stack based
> structures. Your position seems very inconsistent.

Alright, I think I had enough of the theoretical mumbo jumbo, with all
due respect. Let's get on with the show.

I'm going to present a very short analysis for different historic leaks
which had little to do with 'padding bytes in stack', but more like
arbitrary kernel memory leaked to userland, or written to disk, or sent
over the network. If by the end of this message you still
believe my position is remotely inconsistent, I'll have to politely
request you to back it up with something that can be technically and
empirically proven from both programmer and security perspectives.

1. CVE-2005-0400 aka the infamous ext2_make_empty() disaster
(http://arkoon.net/advisories/ext2-make-empty-leak.txt)

The ext2 code before 2.6.11.6 was affected by an uninitialized variable
usage vulnerability which lead to 4072 bytes worth of kernel memory
being leaked to disk, when creating a block for a new directory entry.
The affected function was ext2_make_empty() and it was fixed by adding a
memset call to zero the memory.

http://lxr.linux.no/linux+v2.6.12/fs/ext2/dir.c#L578

 594        kaddr = kmap_atomic(page, KM_USER0);
 595       memset(kaddr, 0, chunk_size);
 596        de = (struct ext2_dir_entry_2 *)kaddr;
 597        de->name_len = 1;
 598        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));

http://lxr.linux.no/linux-bk+v2.6.11.5/fs/ext2/dir.c#L578

 594        kaddr = kmap_atomic(page, KM_USER0);
 595        de = (struct ext2_dir_entry_2 *)kaddr;
 596        de->name_len = 1;
 597        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
 598        memcpy (de->name, ".\0\0", 4);

An atomic call to kmap(). This lead to widespread searching for online
ext2 images and general hilarity. And it was a longstanding issue in
the kernel, too.

2. CVE-2009-0787 aka ecryptfs_write_metadata_to_contents() leak
(commit 8faece5f906725c10e7a1f6caf84452abadbdc7b)

The ecryptfs function ecryptfs_write_metadata_to_contents() leaked up to
an entire page to userland. An incorrect size was used during the copy
operation, leading to more bytes being copied, hence the leak.

+       virt_len = crypt_stat->num_header_bytes_at_front;
+       order = get_order(virt_len);
        /* Released in this function */
-       virt = (char *)get_zeroed_page(GFP_KERNEL);
+       virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL,
	order);

3. CVE-2002-0046 aka information leak over ICMP TTL Exceeded responses
(http://archives.neohapsis.com/archives/bugtraq/2002-01/0234.html)
(http://rhn.redhat.com/errata/RHSA-2002-007.html)

Series of fragmented ICMP packets that generate an ICMP TTL
Exceeded response would include 20 bytes of arbitrary kernel memory,
sent back to the attacker. I didn't bother digging for the patch. But
you bet it has to do with kmallocated skb buffers (take a look at
http://lxr.linux.no/linux-old+v2.2.16/net/ipv4/ipip.c#L436).

4. CVE-2007-6417 aka shmem_getpage() tmpfs leak
(http://marc.info/?l=linux-kernel&amp;m=119627664702379&amp;w=2)

An issue related with tmpfs, users were able to obtain kernel memory
because the shmem_getpage() didn't always zero the memory when reusing
an allocated page. The vulnerability was present from 2.6.11 through
2.6.23.

@@ -1306,6 +1306,7 @@ repeat:
 
		info->alloced++;
 		spin_unlock(&info->lock);
+		clear_highpage(filepage);
		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
	}

If the caller provided the page already allocated, the GFP_ZERO
allocation never happened, and the page was never cleared. Interesting
issue since my patch basically ensures this doesn't happen. Nevermind.

5. CVE-2008-4113 aka sctp_getsockopt_hmac_ident() leak (< 2.6.26.4)
(commit d97240552cd98c4b07322f30f66fd9c3ba4171de)
(exploit by Jon Oberheide at http://www.milw0rm.com/exploits/7618)

In kernels before 2.6.26.4 with SCTP and the SCTP-AUTH extension
enabled, an unprivileged local can leak arbitrary kernel memory abusing
an unbounded (due to incorrect length check) copy in the
sctp_getsockopt_hmac_ident() function. The data copied comes from a
kmallocated object (the struct sctp_association *asoc). This could be
exploited with a SCTP_HMAC_IDENT IOCTL request (through sctp_getsockopt).

>From the exploit:
  *   If SCTP AUTH is enabled (net.sctp.auth_enable = 1), this exploit
  *   allow an  unprivileged user to dump an arbitrary amount (DUMP_SIZE) of
  *   kernel memory out to a file (DUMP_FILE). If SCTP AUTH is not enabled, the
  *   exploit will trigger a kernel OOPS.

It's worth noting that the commit title and description don't reveal the
true nature of the bug (a perfectly exploitable vulnerability, platform
independent like most other information leaks):
"sctp: fix random memory dereference with SCTP_HMAC_IDENT option."

At least it's not entirely deceitful. It's definitely dereferencing
"random memory".

6. CVE-2007-1000 aka ipv6_getsockopt_sticky() leak (<2.6.20.2)
(http://bugzilla.kernel.org/show_bug.cgi?id=8134)
(commit 286930797d74b2c9a5beae84836044f6a836235f)
(exploit at http://www.milw0rm.com/exploits/4172)

The bug was initially assumed to be a simple NULL pointer dereference by
Chris Wright... but since kernel and userland address space coexist in
x86 and other architectures, this is an exploitable condition which
was used to leak kernel memory to userland after a page was allocated at
NULL by the exploit abusing the issue.

-

Further examples could be found in the commit logs or mining other places.
Also, this is the tip of the iceberg. Whatever is lurking deep inside the
kernel sources right now will only be deterred with my patch and any future
modifications that cover corner cases.

The following file contains a list of CVE numbers correlated with
commits, which comes handy to look for more examples:
http://web.mit.edu/tabbott/www/cve-data/cve-data.txt

I've saved a backup copy in case it goes offline and will put it
somewhere accessible for people on the list in such a case.

My intention here is to make the kernel more secure, not proving you
wrong or right.

You are a smart fellow and I respect your technical and kernel development
acumen. Smart people don't waste their time on meaningless banter.

I'll have the modified patches ready in an hour or so, hopefully.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 18:03           ` Larry H.
@ 2009-05-22 18:21             ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-22 18:21 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

> I'm going to present a very short analysis for different historic leaks
> which had little to do with 'padding bytes in stack', but more like

I wouldn't dispute both classes exist - and a lot of the padding leaks
probably never got a CVE either (eg some of the tty ones just got fixed)

> If the caller provided the page already allocated, the GFP_ZERO
> allocation never happened, and the page was never cleared. Interesting
> issue since my patch basically ensures this doesn't happen. Nevermind.

Which patch are we talking about ? I'm all for a security option which
clears *all* objects on freeing them (actually the poison debug is pretty
close to this). That would fix these examples too.

> At least it's not entirely deceitful. It's definitely dereferencing
> "random memory".

Which could be another task stack you didn't clear - yes ?

> was used to leak kernel memory to userland after a page was allocated at
> NULL by the exploit abusing the issue.

Including task stacks yes ?

And task stacks contain copies of important data yes ?

> My intention here is to make the kernel more secure, not proving you
> wrong or right.

Ditto - which is why I'm coming from the position of an "if we free it
clear it" option. If you need that kind of security the cost should be
more than acceptable - especially with modern processors that can do
cache bypass on the clears.

> You are a smart fellow and I respect your technical and kernel development
> acumen. Smart people don't waste their time on meaningless banter.
> 
> I'll have the modified patches ready in an hour or so, hopefully.

Excellent.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 18:21             ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-22 18:21 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

> I'm going to present a very short analysis for different historic leaks
> which had little to do with 'padding bytes in stack', but more like

I wouldn't dispute both classes exist - and a lot of the padding leaks
probably never got a CVE either (eg some of the tty ones just got fixed)

> If the caller provided the page already allocated, the GFP_ZERO
> allocation never happened, and the page was never cleared. Interesting
> issue since my patch basically ensures this doesn't happen. Nevermind.

Which patch are we talking about ? I'm all for a security option which
clears *all* objects on freeing them (actually the poison debug is pretty
close to this). That would fix these examples too.

> At least it's not entirely deceitful. It's definitely dereferencing
> "random memory".

Which could be another task stack you didn't clear - yes ?

> was used to leak kernel memory to userland after a page was allocated at
> NULL by the exploit abusing the issue.

Including task stacks yes ?

And task stacks contain copies of important data yes ?

> My intention here is to make the kernel more secure, not proving you
> wrong or right.

Ditto - which is why I'm coming from the position of an "if we free it
clear it" option. If you need that kind of security the cost should be
more than acceptable - especially with modern processors that can do
cache bypass on the clears.

> You are a smart fellow and I respect your technical and kernel development
> acumen. Smart people don't waste their time on meaningless banter.
> 
> I'll have the modified patches ready in an hour or so, hopefully.

Excellent.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-22 18:03           ` Larry H.
@ 2009-05-22 18:37             ` Nai Xia
  -1 siblings, 0 replies; 220+ messages in thread
From: Nai Xia @ 2009-05-22 18:37 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, May 23, 2009 at 2:03 AM, Larry H. <research@subreption.com> wrote:
> On 14:39 Fri 22 May     , Alan Cox wrote:
>> > > performance point of view: we _dont_ want to clear the full stack
>> > > page for every kernel thread exiting.
>> >
>> > Burning the stack there is beyond overkill.
>>
>> Yet most of our historic leaks have been padding bytes in stack based
>> structures. Your position seems very inconsistent.
>
> Alright, I think I had enough of the theoretical mumbo jumbo, with all
> due respect. Let's get on with the show.
>
> I'm going to present a very short analysis for different historic leaks
> which had little to do with 'padding bytes in stack', but more like
> arbitrary kernel memory leaked to userland, or written to disk, or sent
> over the network. If by the end of this message you still
> believe my position is remotely inconsistent, I'll have to politely
> request you to back it up with something that can be technically and
> empirically proven from both programmer and security perspectives.
>
> 1. CVE-2005-0400 aka the infamous ext2_make_empty() disaster
> (http://arkoon.net/advisories/ext2-make-empty-leak.txt)
>
> The ext2 code before 2.6.11.6 was affected by an uninitialized variable
> usage vulnerability which lead to 4072 bytes worth of kernel memory
> being leaked to disk, when creating a block for a new directory entry.
> The affected function was ext2_make_empty() and it was fixed by adding a
> memset call to zero the memory.
>
> http://lxr.linux.no/linux+v2.6.12/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595       memset(kaddr, 0, chunk_size);
>  596        de = (struct ext2_dir_entry_2 *)kaddr;
>  597        de->name_len = 1;
>  598        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>
> http://lxr.linux.no/linux-bk+v2.6.11.5/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595        de = (struct ext2_dir_entry_2 *)kaddr;
>  596        de->name_len = 1;
>  597        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>  598        memcpy (de->name, ".\0\0", 4);
>
> An atomic call to kmap(). This lead to widespread searching for online
> ext2 images and general hilarity. And it was a longstanding issue in
> the kernel, too.
>
> 2. CVE-2009-0787 aka ecryptfs_write_metadata_to_contents() leak
> (commit 8faece5f906725c10e7a1f6caf84452abadbdc7b)
>
> The ecryptfs function ecryptfs_write_metadata_to_contents() leaked up to
> an entire page to userland. An incorrect size was used during the copy
> operation, leading to more bytes being copied, hence the leak.
>
> +       virt_len = crypt_stat->num_header_bytes_at_front;
> +       order = get_order(virt_len);
>        /* Released in this function */
> -       virt = (char *)get_zeroed_page(GFP_KERNEL);
> +       virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL,
>        order);
>
> 3. CVE-2002-0046 aka information leak over ICMP TTL Exceeded responses
> (http://archives.neohapsis.com/archives/bugtraq/2002-01/0234.html)
> (http://rhn.redhat.com/errata/RHSA-2002-007.html)
>
> Series of fragmented ICMP packets that generate an ICMP TTL
> Exceeded response would include 20 bytes of arbitrary kernel memory,
> sent back to the attacker. I didn't bother digging for the patch. But
> you bet it has to do with kmallocated skb buffers (take a look at
> http://lxr.linux.no/linux-old+v2.2.16/net/ipv4/ipip.c#L436).
>
> 4. CVE-2007-6417 aka shmem_getpage() tmpfs leak
> (http://marc.info/?l=linux-kernel&amp;m=119627664702379&amp;w=2)
>
> An issue related with tmpfs, users were able to obtain kernel memory
> because the shmem_getpage() didn't always zero the memory when reusing
> an allocated page. The vulnerability was present from 2.6.11 through
> 2.6.23.
>
> @@ -1306,6 +1306,7 @@ repeat:
>
>                info->alloced++;
>                spin_unlock(&info->lock);
> +               clear_highpage(filepage);
>                flush_dcache_page(filepage);
>                SetPageUptodate(filepage);
>        }
>
> If the caller provided the page already allocated, the GFP_ZERO
> allocation never happened, and the page was never cleared. Interesting
> issue since my patch basically ensures this doesn't happen. Nevermind.
>
> 5. CVE-2008-4113 aka sctp_getsockopt_hmac_ident() leak (< 2.6.26.4)
> (commit d97240552cd98c4b07322f30f66fd9c3ba4171de)
> (exploit by Jon Oberheide at http://www.milw0rm.com/exploits/7618)
>
> In kernels before 2.6.26.4 with SCTP and the SCTP-AUTH extension
> enabled, an unprivileged local can leak arbitrary kernel memory abusing
> an unbounded (due to incorrect length check) copy in the
> sctp_getsockopt_hmac_ident() function. The data copied comes from a
> kmallocated object (the struct sctp_association *asoc). This could be
> exploited with a SCTP_HMAC_IDENT IOCTL request (through sctp_getsockopt).
>
> From the exploit:
>  *   If SCTP AUTH is enabled (net.sctp.auth_enable = 1), this exploit
>  *   allow an  unprivileged user to dump an arbitrary amount (DUMP_SIZE) of
>  *   kernel memory out to a file (DUMP_FILE). If SCTP AUTH is not enabled, the
>  *   exploit will trigger a kernel OOPS.
>
> It's worth noting that the commit title and description don't reveal the
> true nature of the bug (a perfectly exploitable vulnerability, platform
> independent like most other information leaks):
> "sctp: fix random memory dereference with SCTP_HMAC_IDENT option."
>
> At least it's not entirely deceitful. It's definitely dereferencing
> "random memory".
>
> 6. CVE-2007-1000 aka ipv6_getsockopt_sticky() leak (<2.6.20.2)
> (http://bugzilla.kernel.org/show_bug.cgi?id=8134)
> (commit 286930797d74b2c9a5beae84836044f6a836235f)
> (exploit at http://www.milw0rm.com/exploits/4172)
>
> The bug was initially assumed to be a simple NULL pointer dereference by
> Chris Wright... but since kernel and userland address space coexist in
> x86 and other architectures, this is an exploitable condition which
> was used to leak kernel memory to userland after a page was allocated at
> NULL by the exploit abusing the issue.
>
> -
>
> Further examples could be found in the commit logs or mining other places.
> Also, this is the tip of the iceberg. Whatever is lurking deep inside the
> kernel sources right now will only be deterred with my patch and any future
> modifications that cover corner cases.
>
> The following file contains a list of CVE numbers correlated with
> commits, which comes handy to look for more examples:
> http://web.mit.edu/tabbott/www/cve-data/cve-data.txt
>
> I've saved a backup copy in case it goes offline and will put it
> somewhere accessible for people on the list in such a case.
>
> My intention here is to make the kernel more secure, not proving you
> wrong or right.
>
> You are a smart fellow and I respect your technical and kernel development
> acumen. Smart people don't waste their time on meaningless banter.

Hi, Larry, I think your patch DOES help in these cases.
But your first mention of "Cold Boot Attacks" may somewhat mislead
others to think it can close all the paths for leaking in-memory sensative data.
Surely, cold boot attack can exploit ANY in-memory data, and just as
Alan Cox said,
not all of the data will go deallocated before your machine is cold rebooted.
Surely, there are many ways to leak data, this patch closes ONE of them.
I think Alan Cox is mentioning "the other ways" .
So maybe you are both right from different aspects.


>
> I'll have the modified patches ready in an hour or so, hopefully.
>
>        Larry
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 18:37             ` Nai Xia
  0 siblings, 0 replies; 220+ messages in thread
From: Nai Xia @ 2009-05-22 18:37 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, May 23, 2009 at 2:03 AM, Larry H. <research@subreption.com> wrote:
> On 14:39 Fri 22 May     , Alan Cox wrote:
>> > > performance point of view: we _dont_ want to clear the full stack
>> > > page for every kernel thread exiting.
>> >
>> > Burning the stack there is beyond overkill.
>>
>> Yet most of our historic leaks have been padding bytes in stack based
>> structures. Your position seems very inconsistent.
>
> Alright, I think I had enough of the theoretical mumbo jumbo, with all
> due respect. Let's get on with the show.
>
> I'm going to present a very short analysis for different historic leaks
> which had little to do with 'padding bytes in stack', but more like
> arbitrary kernel memory leaked to userland, or written to disk, or sent
> over the network. If by the end of this message you still
> believe my position is remotely inconsistent, I'll have to politely
> request you to back it up with something that can be technically and
> empirically proven from both programmer and security perspectives.
>
> 1. CVE-2005-0400 aka the infamous ext2_make_empty() disaster
> (http://arkoon.net/advisories/ext2-make-empty-leak.txt)
>
> The ext2 code before 2.6.11.6 was affected by an uninitialized variable
> usage vulnerability which lead to 4072 bytes worth of kernel memory
> being leaked to disk, when creating a block for a new directory entry.
> The affected function was ext2_make_empty() and it was fixed by adding a
> memset call to zero the memory.
>
> http://lxr.linux.no/linux+v2.6.12/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595       memset(kaddr, 0, chunk_size);
>  596        de = (struct ext2_dir_entry_2 *)kaddr;
>  597        de->name_len = 1;
>  598        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>
> http://lxr.linux.no/linux-bk+v2.6.11.5/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595        de = (struct ext2_dir_entry_2 *)kaddr;
>  596        de->name_len = 1;
>  597        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>  598        memcpy (de->name, ".\0\0", 4);
>
> An atomic call to kmap(). This lead to widespread searching for online
> ext2 images and general hilarity. And it was a longstanding issue in
> the kernel, too.
>
> 2. CVE-2009-0787 aka ecryptfs_write_metadata_to_contents() leak
> (commit 8faece5f906725c10e7a1f6caf84452abadbdc7b)
>
> The ecryptfs function ecryptfs_write_metadata_to_contents() leaked up to
> an entire page to userland. An incorrect size was used during the copy
> operation, leading to more bytes being copied, hence the leak.
>
> +       virt_len = crypt_stat->num_header_bytes_at_front;
> +       order = get_order(virt_len);
>        /* Released in this function */
> -       virt = (char *)get_zeroed_page(GFP_KERNEL);
> +       virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL,
>        order);
>
> 3. CVE-2002-0046 aka information leak over ICMP TTL Exceeded responses
> (http://archives.neohapsis.com/archives/bugtraq/2002-01/0234.html)
> (http://rhn.redhat.com/errata/RHSA-2002-007.html)
>
> Series of fragmented ICMP packets that generate an ICMP TTL
> Exceeded response would include 20 bytes of arbitrary kernel memory,
> sent back to the attacker. I didn't bother digging for the patch. But
> you bet it has to do with kmallocated skb buffers (take a look at
> http://lxr.linux.no/linux-old+v2.2.16/net/ipv4/ipip.c#L436).
>
> 4. CVE-2007-6417 aka shmem_getpage() tmpfs leak
> (http://marc.info/?l=linux-kernel&amp;m=119627664702379&amp;w=2)
>
> An issue related with tmpfs, users were able to obtain kernel memory
> because the shmem_getpage() didn't always zero the memory when reusing
> an allocated page. The vulnerability was present from 2.6.11 through
> 2.6.23.
>
> @@ -1306,6 +1306,7 @@ repeat:
>
>                info->alloced++;
>                spin_unlock(&info->lock);
> +               clear_highpage(filepage);
>                flush_dcache_page(filepage);
>                SetPageUptodate(filepage);
>        }
>
> If the caller provided the page already allocated, the GFP_ZERO
> allocation never happened, and the page was never cleared. Interesting
> issue since my patch basically ensures this doesn't happen. Nevermind.
>
> 5. CVE-2008-4113 aka sctp_getsockopt_hmac_ident() leak (< 2.6.26.4)
> (commit d97240552cd98c4b07322f30f66fd9c3ba4171de)
> (exploit by Jon Oberheide at http://www.milw0rm.com/exploits/7618)
>
> In kernels before 2.6.26.4 with SCTP and the SCTP-AUTH extension
> enabled, an unprivileged local can leak arbitrary kernel memory abusing
> an unbounded (due to incorrect length check) copy in the
> sctp_getsockopt_hmac_ident() function. The data copied comes from a
> kmallocated object (the struct sctp_association *asoc). This could be
> exploited with a SCTP_HMAC_IDENT IOCTL request (through sctp_getsockopt).
>
> From the exploit:
>  *   If SCTP AUTH is enabled (net.sctp.auth_enable = 1), this exploit
>  *   allow an  unprivileged user to dump an arbitrary amount (DUMP_SIZE) of
>  *   kernel memory out to a file (DUMP_FILE). If SCTP AUTH is not enabled, the
>  *   exploit will trigger a kernel OOPS.
>
> It's worth noting that the commit title and description don't reveal the
> true nature of the bug (a perfectly exploitable vulnerability, platform
> independent like most other information leaks):
> "sctp: fix random memory dereference with SCTP_HMAC_IDENT option."
>
> At least it's not entirely deceitful. It's definitely dereferencing
> "random memory".
>
> 6. CVE-2007-1000 aka ipv6_getsockopt_sticky() leak (<2.6.20.2)
> (http://bugzilla.kernel.org/show_bug.cgi?id=8134)
> (commit 286930797d74b2c9a5beae84836044f6a836235f)
> (exploit at http://www.milw0rm.com/exploits/4172)
>
> The bug was initially assumed to be a simple NULL pointer dereference by
> Chris Wright... but since kernel and userland address space coexist in
> x86 and other architectures, this is an exploitable condition which
> was used to leak kernel memory to userland after a page was allocated at
> NULL by the exploit abusing the issue.
>
> -
>
> Further examples could be found in the commit logs or mining other places.
> Also, this is the tip of the iceberg. Whatever is lurking deep inside the
> kernel sources right now will only be deterred with my patch and any future
> modifications that cover corner cases.
>
> The following file contains a list of CVE numbers correlated with
> commits, which comes handy to look for more examples:
> http://web.mit.edu/tabbott/www/cve-data/cve-data.txt
>
> I've saved a backup copy in case it goes offline and will put it
> somewhere accessible for people on the list in such a case.
>
> My intention here is to make the kernel more secure, not proving you
> wrong or right.
>
> You are a smart fellow and I respect your technical and kernel development
> acumen. Smart people don't waste their time on meaningless banter.

Hi, Larry, I think your patch DOES help in these cases.
But your first mention of "Cold Boot Attacks" may somewhat mislead
others to think it can close all the paths for leaking in-memory sensative data.
Surely, cold boot attack can exploit ANY in-memory data, and just as
Alan Cox said,
not all of the data will go deallocated before your machine is cold rebooted.
Surely, there are many ways to leak data, this patch closes ONE of them.
I think Alan Cox is mentioning "the other ways" .
So maybe you are both right from different aspects.


>
> I'll have the modified patches ready in an hour or so, hopefully.
>
>        Larry
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-22 18:03           ` Larry H.
@ 2009-05-22 19:18             ` Nai Xia
  -1 siblings, 0 replies; 220+ messages in thread
From: Nai Xia @ 2009-05-22 19:18 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, May 23, 2009 at 2:03 AM, Larry H. <research@subreption.com> wrote:
> On 14:39 Fri 22 May     , Alan Cox wrote:
>> > > performance point of view: we _dont_ want to clear the full stack
>> > > page for every kernel thread exiting.
>> >
>> > Burning the stack there is beyond overkill.
>>
>> Yet most of our historic leaks have been padding bytes in stack based
>> structures. Your position seems very inconsistent.
>
> Alright, I think I had enough of the theoretical mumbo jumbo, with all
> due respect. Let's get on with the show.
>
> I'm going to present a very short analysis for different historic leaks
> which had little to do with 'padding bytes in stack', but more like
> arbitrary kernel memory leaked to userland, or written to disk, or sent
> over the network. If by the end of this message you still
> believe my position is remotely inconsistent, I'll have to politely
> request you to back it up with something that can be technically and
> empirically proven from both programmer and security perspectives.
>
> 1. CVE-2005-0400 aka the infamous ext2_make_empty() disaster
> (http://arkoon.net/advisories/ext2-make-empty-leak.txt)
>
> The ext2 code before 2.6.11.6 was affected by an uninitialized variable
> usage vulnerability which lead to 4072 bytes worth of kernel memory
> being leaked to disk, when creating a block for a new directory entry.
> The affected function was ext2_make_empty() and it was fixed by adding a
> memset call to zero the memory.
>
> http://lxr.linux.no/linux+v2.6.12/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595       memset(kaddr, 0, chunk_size);
>  596        de = (struct ext2_dir_entry_2 *)kaddr;
>  597        de->name_len = 1;
>  598        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>
> http://lxr.linux.no/linux-bk+v2.6.11.5/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595        de = (struct ext2_dir_entry_2 *)kaddr;
>  596        de->name_len = 1;
>  597        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>  598        memcpy (de->name, ".\0\0", 4);
>
> An atomic call to kmap(). This lead to widespread searching for online
> ext2 images and general hilarity. And it was a longstanding issue in
> the kernel, too.
>
> 2. CVE-2009-0787 aka ecryptfs_write_metadata_to_contents() leak
> (commit 8faece5f906725c10e7a1f6caf84452abadbdc7b)
>
> The ecryptfs function ecryptfs_write_metadata_to_contents() leaked up to
> an entire page to userland. An incorrect size was used during the copy
> operation, leading to more bytes being copied, hence the leak.
>
> +       virt_len = crypt_stat->num_header_bytes_at_front;
> +       order = get_order(virt_len);
>        /* Released in this function */
> -       virt = (char *)get_zeroed_page(GFP_KERNEL);
> +       virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL,
>        order);
>
> 3. CVE-2002-0046 aka information leak over ICMP TTL Exceeded responses
> (http://archives.neohapsis.com/archives/bugtraq/2002-01/0234.html)
> (http://rhn.redhat.com/errata/RHSA-2002-007.html)
>
> Series of fragmented ICMP packets that generate an ICMP TTL
> Exceeded response would include 20 bytes of arbitrary kernel memory,
> sent back to the attacker. I didn't bother digging for the patch. But
> you bet it has to do with kmallocated skb buffers (take a look at
> http://lxr.linux.no/linux-old+v2.2.16/net/ipv4/ipip.c#L436).
>
> 4. CVE-2007-6417 aka shmem_getpage() tmpfs leak
> (http://marc.info/?l=linux-kernel&amp;m=119627664702379&amp;w=2)
>
> An issue related with tmpfs, users were able to obtain kernel memory
> because the shmem_getpage() didn't always zero the memory when reusing
> an allocated page. The vulnerability was present from 2.6.11 through
> 2.6.23.
>
> @@ -1306,6 +1306,7 @@ repeat:
>
>                info->alloced++;
>                spin_unlock(&info->lock);
> +               clear_highpage(filepage);
>                flush_dcache_page(filepage);
>                SetPageUptodate(filepage);
>        }
>
> If the caller provided the page already allocated, the GFP_ZERO
> allocation never happened, and the page was never cleared. Interesting
> issue since my patch basically ensures this doesn't happen. Nevermind.
>
> 5. CVE-2008-4113 aka sctp_getsockopt_hmac_ident() leak (< 2.6.26.4)
> (commit d97240552cd98c4b07322f30f66fd9c3ba4171de)
> (exploit by Jon Oberheide at http://www.milw0rm.com/exploits/7618)
>
> In kernels before 2.6.26.4 with SCTP and the SCTP-AUTH extension
> enabled, an unprivileged local can leak arbitrary kernel memory abusing
> an unbounded (due to incorrect length check) copy in the
> sctp_getsockopt_hmac_ident() function. The data copied comes from a
> kmallocated object (the struct sctp_association *asoc). This could be
> exploited with a SCTP_HMAC_IDENT IOCTL request (through sctp_getsockopt).
>
> From the exploit:
>  *   If SCTP AUTH is enabled (net.sctp.auth_enable = 1), this exploit
>  *   allow an  unprivileged user to dump an arbitrary amount (DUMP_SIZE) of
>  *   kernel memory out to a file (DUMP_FILE). If SCTP AUTH is not enabled, the
>  *   exploit will trigger a kernel OOPS.
>
> It's worth noting that the commit title and description don't reveal the
> true nature of the bug (a perfectly exploitable vulnerability, platform
> independent like most other information leaks):
> "sctp: fix random memory dereference with SCTP_HMAC_IDENT option."
>
> At least it's not entirely deceitful. It's definitely dereferencing
> "random memory".
>
> 6. CVE-2007-1000 aka ipv6_getsockopt_sticky() leak (<2.6.20.2)
> (http://bugzilla.kernel.org/show_bug.cgi?id=8134)
> (commit 286930797d74b2c9a5beae84836044f6a836235f)
> (exploit at http://www.milw0rm.com/exploits/4172)
>
> The bug was initially assumed to be a simple NULL pointer dereference by
> Chris Wright... but since kernel and userland address space coexist in
> x86 and other architectures, this is an exploitable condition which
> was used to leak kernel memory to userland after a page was allocated at
> NULL by the exploit abusing the issue.
>

As we all can see from these CVEs, in order to fix them,
data can be zeroed at two points:
1. zero allocated pages that can leak to user
2. zero sensitive pages when they are recycled

Your patch chooses the latter.
But problem still remains, that is how to judge a page is sensitive  or not?
I wonder if all of the developer can always make successful judgement.
Any kernel data/meta data can be sensitive, and that also depends on how
the exploit code will use it.  So we just blindly flag them all ?

While it's comparatively much easier to track if a page will go to user.
So why not adopt the 1. time point to zero pages?

> -
>
> Further examples could be found in the commit logs or mining other places.
> Also, this is the tip of the iceberg. Whatever is lurking deep inside the
> kernel sources right now will only be deterred with my patch and any future
> modifications that cover corner cases.
>
> The following file contains a list of CVE numbers correlated with
> commits, which comes handy to look for more examples:
> http://web.mit.edu/tabbott/www/cve-data/cve-data.txt
>
> I've saved a backup copy in case it goes offline and will put it
> somewhere accessible for people on the list in such a case.
>
> My intention here is to make the kernel more secure, not proving you
> wrong or right.
>
> You are a smart fellow and I respect your technical and kernel development
> acumen. Smart people don't waste their time on meaningless banter.
>
> I'll have the modified patches ready in an hour or so, hopefully.
>
>        Larry
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-22 19:18             ` Nai Xia
  0 siblings, 0 replies; 220+ messages in thread
From: Nai Xia @ 2009-05-22 19:18 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, May 23, 2009 at 2:03 AM, Larry H. <research@subreption.com> wrote:
> On 14:39 Fri 22 May     , Alan Cox wrote:
>> > > performance point of view: we _dont_ want to clear the full stack
>> > > page for every kernel thread exiting.
>> >
>> > Burning the stack there is beyond overkill.
>>
>> Yet most of our historic leaks have been padding bytes in stack based
>> structures. Your position seems very inconsistent.
>
> Alright, I think I had enough of the theoretical mumbo jumbo, with all
> due respect. Let's get on with the show.
>
> I'm going to present a very short analysis for different historic leaks
> which had little to do with 'padding bytes in stack', but more like
> arbitrary kernel memory leaked to userland, or written to disk, or sent
> over the network. If by the end of this message you still
> believe my position is remotely inconsistent, I'll have to politely
> request you to back it up with something that can be technically and
> empirically proven from both programmer and security perspectives.
>
> 1. CVE-2005-0400 aka the infamous ext2_make_empty() disaster
> (http://arkoon.net/advisories/ext2-make-empty-leak.txt)
>
> The ext2 code before 2.6.11.6 was affected by an uninitialized variable
> usage vulnerability which lead to 4072 bytes worth of kernel memory
> being leaked to disk, when creating a block for a new directory entry.
> The affected function was ext2_make_empty() and it was fixed by adding a
> memset call to zero the memory.
>
> http://lxr.linux.no/linux+v2.6.12/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595       memset(kaddr, 0, chunk_size);
>  596        de = (struct ext2_dir_entry_2 *)kaddr;
>  597        de->name_len = 1;
>  598        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>
> http://lxr.linux.no/linux-bk+v2.6.11.5/fs/ext2/dir.c#L578
>
>  594        kaddr = kmap_atomic(page, KM_USER0);
>  595        de = (struct ext2_dir_entry_2 *)kaddr;
>  596        de->name_len = 1;
>  597        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
>  598        memcpy (de->name, ".\0\0", 4);
>
> An atomic call to kmap(). This lead to widespread searching for online
> ext2 images and general hilarity. And it was a longstanding issue in
> the kernel, too.
>
> 2. CVE-2009-0787 aka ecryptfs_write_metadata_to_contents() leak
> (commit 8faece5f906725c10e7a1f6caf84452abadbdc7b)
>
> The ecryptfs function ecryptfs_write_metadata_to_contents() leaked up to
> an entire page to userland. An incorrect size was used during the copy
> operation, leading to more bytes being copied, hence the leak.
>
> +       virt_len = crypt_stat->num_header_bytes_at_front;
> +       order = get_order(virt_len);
>        /* Released in this function */
> -       virt = (char *)get_zeroed_page(GFP_KERNEL);
> +       virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL,
>        order);
>
> 3. CVE-2002-0046 aka information leak over ICMP TTL Exceeded responses
> (http://archives.neohapsis.com/archives/bugtraq/2002-01/0234.html)
> (http://rhn.redhat.com/errata/RHSA-2002-007.html)
>
> Series of fragmented ICMP packets that generate an ICMP TTL
> Exceeded response would include 20 bytes of arbitrary kernel memory,
> sent back to the attacker. I didn't bother digging for the patch. But
> you bet it has to do with kmallocated skb buffers (take a look at
> http://lxr.linux.no/linux-old+v2.2.16/net/ipv4/ipip.c#L436).
>
> 4. CVE-2007-6417 aka shmem_getpage() tmpfs leak
> (http://marc.info/?l=linux-kernel&amp;m=119627664702379&amp;w=2)
>
> An issue related with tmpfs, users were able to obtain kernel memory
> because the shmem_getpage() didn't always zero the memory when reusing
> an allocated page. The vulnerability was present from 2.6.11 through
> 2.6.23.
>
> @@ -1306,6 +1306,7 @@ repeat:
>
>                info->alloced++;
>                spin_unlock(&info->lock);
> +               clear_highpage(filepage);
>                flush_dcache_page(filepage);
>                SetPageUptodate(filepage);
>        }
>
> If the caller provided the page already allocated, the GFP_ZERO
> allocation never happened, and the page was never cleared. Interesting
> issue since my patch basically ensures this doesn't happen. Nevermind.
>
> 5. CVE-2008-4113 aka sctp_getsockopt_hmac_ident() leak (< 2.6.26.4)
> (commit d97240552cd98c4b07322f30f66fd9c3ba4171de)
> (exploit by Jon Oberheide at http://www.milw0rm.com/exploits/7618)
>
> In kernels before 2.6.26.4 with SCTP and the SCTP-AUTH extension
> enabled, an unprivileged local can leak arbitrary kernel memory abusing
> an unbounded (due to incorrect length check) copy in the
> sctp_getsockopt_hmac_ident() function. The data copied comes from a
> kmallocated object (the struct sctp_association *asoc). This could be
> exploited with a SCTP_HMAC_IDENT IOCTL request (through sctp_getsockopt).
>
> From the exploit:
>  *   If SCTP AUTH is enabled (net.sctp.auth_enable = 1), this exploit
>  *   allow an  unprivileged user to dump an arbitrary amount (DUMP_SIZE) of
>  *   kernel memory out to a file (DUMP_FILE). If SCTP AUTH is not enabled, the
>  *   exploit will trigger a kernel OOPS.
>
> It's worth noting that the commit title and description don't reveal the
> true nature of the bug (a perfectly exploitable vulnerability, platform
> independent like most other information leaks):
> "sctp: fix random memory dereference with SCTP_HMAC_IDENT option."
>
> At least it's not entirely deceitful. It's definitely dereferencing
> "random memory".
>
> 6. CVE-2007-1000 aka ipv6_getsockopt_sticky() leak (<2.6.20.2)
> (http://bugzilla.kernel.org/show_bug.cgi?id=8134)
> (commit 286930797d74b2c9a5beae84836044f6a836235f)
> (exploit at http://www.milw0rm.com/exploits/4172)
>
> The bug was initially assumed to be a simple NULL pointer dereference by
> Chris Wright... but since kernel and userland address space coexist in
> x86 and other architectures, this is an exploitable condition which
> was used to leak kernel memory to userland after a page was allocated at
> NULL by the exploit abusing the issue.
>

As we all can see from these CVEs, in order to fix them,
data can be zeroed at two points:
1. zero allocated pages that can leak to user
2. zero sensitive pages when they are recycled

Your patch chooses the latter.
But problem still remains, that is how to judge a page is sensitive  or not?
I wonder if all of the developer can always make successful judgement.
Any kernel data/meta data can be sensitive, and that also depends on how
the exploit code will use it.  So we just blindly flag them all ?

While it's comparatively much easier to track if a page will go to user.
So why not adopt the 1. time point to zero pages?

> -
>
> Further examples could be found in the commit logs or mining other places.
> Also, this is the tip of the iceberg. Whatever is lurking deep inside the
> kernel sources right now will only be deterred with my patch and any future
> modifications that cover corner cases.
>
> The following file contains a list of CVE numbers correlated with
> commits, which comes handy to look for more examples:
> http://web.mit.edu/tabbott/www/cve-data/cve-data.txt
>
> I've saved a backup copy in case it goes offline and will put it
> somewhere accessible for people on the list in such a case.
>
> My intention here is to make the kernel more secure, not proving you
> wrong or right.
>
> You are a smart fellow and I respect your technical and kernel development
> acumen. Smart people don't waste their time on meaningless banter.
>
> I'll have the modified patches ready in an hour or so, hopefully.
>
>        Larry
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* [PATCH] Support for kernel memory sanitization
  2009-05-22 18:21             ` Alan Cox
  (?)
@ 2009-05-22 23:25             ` Larry H.
  2009-05-22 23:52               ` Randy Dunlap
  -1 siblings, 1 reply; 220+ messages in thread
From: Larry H. @ 2009-05-22 23:25 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

[PATCH] Support for kernel memory sanitization

This patch adds support for the CONFIDENTIAL flag to the SLAB and SLUB
allocators. An additional GFP flag is added for use with higher level
allocators (GFP_CONFIDENTIAL, which implies GFP_ZERO).

A boot command line option (sanitize_mem) is added for the page
allocator to perform sanitization of all pages upon release and
allocation.

The code is largely based off the memory sanitization feature in the
PaX project (licensed under the GPL v2 terms) and the original
PG_sensitive patch which allowed fine-grained marking of pages using
a page flag. The lack of a page flag makes the gfp flag mostly useless,
since we can't track pages with the sensitive/confidential bit, and
properly sanitize them on release. The only way to overcome this
limitation is to enable the sanitize_mem boot option and perform
unconditional page sanitization.

This avoids leaking sensitive information when memory is released to
the system after use, for example in cryptographic subsystems. More
specifically, the following threats are addressed:

	1. Information leaks in use-after-free or uninitialized
	variable usage scenarios, such as CVE-2005-0400,
	CVE-2009-0787 and CVE-2007-6417.

	2. Data remanence based attacks, such as Iceman/Coldboot,
	which combine cold rebooting and memory image scanning
	to extract cryptographic secrets (ex. detecting AES key
	expansion blocks, RSA key patterns, etc) or other
	confidential information.

	3. Re-allocation based information leaks, especially in the
	SLAB/SLUB allocators which use LIFO caches and might expose
	sensitive data out of context (when a caller allocates an
	object and receives a pointer to a location which was used
	previously by another user).

The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
Deallocation" paper by Jim Chow et. al from the Stanford University
Department of Computer Science, explains the security implications of
insecure deallocation, and provides extensive information with figures
and applications thoroughly analyzed for this behavior [1]. More recently
this issue came to widespread attention when the "Lest We Remember:
Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
published [2].

This patch has been tested on x86 and amd64, with and without HIGHMEM.

	[1] http://www.stanford.edu/~blp/papers/shredding.html
	[2] http://citp.princeton.edu/memory/
	[3] http://marc.info/?l=linux-mm&m=124284428226461&w=2
	[4] http://marc.info/?t=124284431000002&r=1&w=2

Signed-off-by: Larry H. <research@subreption.com>

---
 Documentation/kernel-parameters.txt    |    2 ++
 arch/alpha/include/asm/kmap_types.h    |    3 ++-
 arch/arm/include/asm/kmap_types.h      |    1 +
 arch/avr32/include/asm/kmap_types.h    |    3 ++-
 arch/blackfin/include/asm/kmap_types.h |    1 +
 arch/cris/include/asm/kmap_types.h     |    1 +
 arch/h8300/include/asm/kmap_types.h    |    1 +
 arch/ia64/include/asm/kmap_types.h     |    3 ++-
 arch/m68k/include/asm/kmap_types_mm.h  |    1 +
 arch/m68k/include/asm/kmap_types_no.h  |    1 +
 arch/mips/include/asm/kmap_types.h     |    3 ++-
 arch/parisc/include/asm/kmap_types.h   |    3 ++-
 arch/powerpc/include/asm/kmap_types.h  |    1 +
 arch/s390/include/asm/kmap_types.h     |    1 +
 arch/sh/include/asm/kmap_types.h       |    3 ++-
 arch/sparc/include/asm/kmap_types.h    |    1 +
 arch/um/include/asm/kmap_types.h       |    1 +
 arch/x86/include/asm/kmap_types.h      |    3 ++-
 arch/xtensa/include/asm/kmap_types.h   |    1 +
 include/asm-frv/kmap_types.h           |    1 +
 include/asm-m32r/kmap_types.h          |    3 ++-
 include/asm-mn10300/kmap_types.h       |    1 +
 include/linux/gfp.h                    |    2 ++
 include/linux/highmem.h                |   12 ++++++++++++
 include/linux/mm.h                     |    2 ++
 include/linux/slab.h                   |    1 +
 mm/page_alloc.c                        |   30 +++++++++++++++++++++++++++++-
 mm/slab.c                              |   13 +++++++++++--
 mm/slub.c                              |   24 ++++++++++++++++++++++++
 29 files changed, 112 insertions(+), 11 deletions(-)

Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -2494,6 +2494,8 @@ and is between 256 and 4096 characters. 
 	norandmaps	Don't use address space randomization.  Equivalent to
 			echo 0 > /proc/sys/kernel/randomize_va_space
 
+	sanitize_mem	Enables sanitization of all allocated pages.
+
 ______________________________________________________________________
 
 TODO:
Index: linux-2.6/arch/alpha/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/kmap_types.h
+++ linux-2.6/arch/alpha/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)  KM_CLEARPAGE,
+D(14)  KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/arm/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/kmap_types.h
+++ linux-2.6/arch/arm/include/asm/kmap_types.h
@@ -18,6 +18,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/avr32/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/avr32/include/asm/kmap_types.h
+++ linux-2.6/arch/avr32/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(10)	KM_IRQ0,
 D(11)	KM_IRQ1,
 D(12)	KM_SOFTIRQ0,
 D(13)	KM_SOFTIRQ1,
-D(14)	KM_TYPE_NR
+D(14)	KM_CLEARPAGE,
+D(15)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/blackfin/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/kmap_types.h
+++ linux-2.6/arch/blackfin/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/cris/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/kmap_types.h
+++ linux-2.6/arch/cris/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/h8300/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/kmap_types.h
+++ linux-2.6/arch/h8300/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/ia64/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/kmap_types.h
+++ linux-2.6/arch/ia64/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_mm.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/m68k/include/asm/kmap_types_no.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_no.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_no.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/mips/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/mips/include/asm/kmap_types.h
+++ linux-2.6/arch/mips/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/parisc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/parisc/include/asm/kmap_types.h
+++ linux-2.6/arch/parisc/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/powerpc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/kmap_types.h
+++ linux-2.6/arch/powerpc/include/asm/kmap_types.h
@@ -26,6 +26,7 @@ enum km_type {
 	KM_SOFTIRQ1,
 	KM_PPC_SYNC_PAGE,
 	KM_PPC_SYNC_ICACHE,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/s390/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/kmap_types.h
+++ linux-2.6/arch/s390/include/asm/kmap_types.h
@@ -16,6 +16,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,	
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/sh/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/kmap_types.h
+++ linux-2.6/arch/sh/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/sparc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/kmap_types.h
+++ linux-2.6/arch/sparc/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/um/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/kmap_types.h
+++ linux-2.6/arch/um/include/asm/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/x86/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/kmap_types.h
+++ linux-2.6/arch/x86/include/asm/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/xtensa/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/xtensa/include/asm/kmap_types.h
+++ linux-2.6/arch/xtensa/include/asm/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
   KM_IRQ1,
   KM_SOFTIRQ0,
   KM_SOFTIRQ1,
+  KM_CLEARPAGE,
   KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-frv/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-frv/kmap_types.h
+++ linux-2.6/include/asm-frv/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-m32r/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/kmap_types.h
+++ linux-2.6/include/asm-m32r/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/include/asm-mn10300/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-mn10300/kmap_types.h
+++ linux-2.6/include/asm-mn10300/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -50,6 +50,7 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
 #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+#define __GFP_CONFIDENTIAL	((__force gfp_t)0x200000u)  /* Page contains sensitive information */
 
 #define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -69,6 +70,7 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
+#define GFP_CONFIDENTIAL	(__GFP_CONFIDENTIAL | __GFP_ZERO)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
Index: linux-2.6/include/linux/highmem.h
===================================================================
--- linux-2.6.orig/include/linux/highmem.h
+++ linux-2.6/include/linux/highmem.h
@@ -124,6 +124,18 @@ static inline void clear_highpage(struct
 	kunmap_atomic(kaddr, KM_USER0);
 }
 
+static inline void sanitize_highpage(struct page *page)
+{
+	void *kaddr;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kaddr = kmap_atomic(page, KM_CLEARPAGE);
+	clear_page(kaddr);
+	kunmap_atomic(kaddr, KM_CLEARPAGE);
+	local_irq_restore(flags);
+}
+
 static inline void zero_user_segments(struct page *page,
 	unsigned start1, unsigned end1,
 	unsigned start2, unsigned end2)
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -25,6 +25,7 @@ extern unsigned long max_mapnr;
 #endif
 
 extern unsigned long num_physpages;
+extern int sanitize_all_mem;
 extern void * high_memory;
 extern int page_cluster;
 
@@ -104,6 +105,7 @@ extern unsigned int kobjsize(const void 
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
+#define VM_CONFIDENTIAL	0x40000000	/* Will contain sensitive data */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
Index: linux-2.6/include/linux/slab.h
===================================================================
--- linux-2.6.orig/include/linux/slab.h
+++ linux-2.6/include/linux/slab.h
@@ -23,6 +23,7 @@
 #define SLAB_CACHE_DMA		0x00004000UL	/* Use GFP_DMA memory */
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
+#define SLAB_CONFIDENTIAL		0x00080000UL	/* Memory will hold sensitive information */
 /*
  * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
  *
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -123,6 +123,7 @@ int min_free_kbytes = 1024;
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
+int sanitize_all_mem;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
   /*
@@ -221,6 +222,17 @@ static inline int bad_range(struct zone 
 }
 #endif
 
+static __init int setup_page_sanitization(char *s)
+{
+	if (s) {
+		sanitize_all_mem = 1;
+		return 1;
+	}
+
+	return 0;
+}
+early_param("sanitize_mem", setup_page_sanitization);
+
 static void bad_page(struct page *page)
 {
 	static unsigned long resume;
@@ -545,6 +557,7 @@ static void free_one_page(struct zone *z
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long index = 1UL << order;
 	unsigned long flags;
 	int i;
 	int bad = 0;
@@ -559,6 +572,16 @@ static void __free_pages_ok(struct page 
 		debug_check_no_obj_freed(page_address(page),
 					   PAGE_SIZE << order);
 	}
+
+	/*
+	 * Page sanitization is enabled, let's clear the page contents before
+	 * release.
+	 */
+	if (sanitize_all_mem) {
+		for (; index; --index)
+			sanitize_highpage(page + index - 1);
+	}
+
 	arch_free_page(page, order);
 	kernel_map_pages(page, 1 << order, 0);
 
@@ -647,7 +670,8 @@ static int prep_new_page(struct page *pa
 	arch_alloc_page(page, order);
 	kernel_map_pages(page, 1 << order, 1);
 
-	if (gfp_flags & __GFP_ZERO)
+	if (((gfp_flags & __GFP_ZERO) || (gfp_flags & __GFP_CONFIDENTIAL))
+		|| sanitize_all_mem)
 		prep_zero_page(page, order, gfp_flags);
 
 	if (order && (gfp_flags & __GFP_COMP))
@@ -1009,6 +1033,10 @@ static void free_hot_cold_page(struct pa
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
 		debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
 	}
+
+	if (sanitize_all_mem)
+		sanitize_highpage(page);
+
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -2270,7 +2270,11 @@ kmem_cache_create (const char *name, siz
 	align = ralign;
 
 	/* Get cache's description obj. */
-	cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+	if (flags & SLAB_CONFIDENTIAL)
+		cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL | GFP_CONFIDENTIAL);
+	else
+		cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+
 	if (!cachep)
 		goto oops;
 
@@ -2356,6 +2360,8 @@ kmem_cache_create (const char *name, siz
 	cachep->gfpflags = 0;
 	if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
 		cachep->gfpflags |= GFP_DMA;
+	if (flags & SLAB_CONFIDENTIAL)
+		cachep->gfpflags |= GFP_CONFIDENTIAL;
 	cachep->buffer_size = size;
 	cachep->reciprocal_buffer_size = reciprocal_value(size);
 
@@ -3350,7 +3356,7 @@ __cache_alloc_node(struct kmem_cache *ca
 	local_irq_restore(save_flags);
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
 
-	if (unlikely((flags & __GFP_ZERO) && ptr))
+	if (unlikely(((flags & __GFP_ZERO) || (flags && __GFP_CONFIDENTIAL)) && ptr))
 		memset(ptr, 0, obj_size(cachep));
 
 	return ptr;
@@ -3519,6 +3525,9 @@ static inline void __cache_free(struct k
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
+	if (unlikely(cachep->flags & SLAB_CONFIDENTIAL))
+		memset(objp, 0, obj_size(cachep));
+
 	/*
 	 * Skip calling cache_free_alien() when the platform is not numa.
 	 * This will avoid cache misses that happen while accessing slabp (which
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -1135,6 +1135,9 @@ static struct page *new_slab(struct kmem
 
 	start = page_address(page);
 
+	if (unlikely(s->flags & SLAB_CONFIDENTIAL))
+		memset(start, 0, PAGE_SIZE << compound_order(page));
+
 	if (unlikely(s->flags & SLAB_POISON))
 		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
 
@@ -1646,6 +1649,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
 static void __slab_free(struct kmem_cache *s, struct page *page,
 			void *x, unsigned long addr, unsigned int offset)
 {
+	int objsize;
 	void *prior;
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
@@ -1662,6 +1666,23 @@ checks_ok:
 	page->freelist = object;
 	page->inuse--;
 
+	if (s->flags & SLAB_CONFIDENTIAL) {
+		/* Size calculation based off ksize() */
+		objsize = s->size;
+
+		if (unlikely(!PageSlab(page))) {
+			WARN_ON(!PageCompound(page));
+			objsize = PAGE_SIZE << compound_order(page);
+		} else {
+			if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
+				objsize = s->objsize;
+			else if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+				objsize = s->inuse;
+		}
+
+		memset(x, 0, objsize);
+	}
+
 	if (unlikely(PageSlubFrozen(page))) {
 		stat(c, FREE_FROZEN);
 		goto out_unlock;
@@ -2292,6 +2313,9 @@ static int calculate_sizes(struct kmem_c
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		s->allocflags |= __GFP_RECLAIMABLE;
 
+	if (s->flags & SLAB_CONFIDENTIAL)
+		s->allocflags |= GFP_CONFIDENTIAL;
+
 	/*
 	 * Determine the number of objects per slab
 	 */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 18:21             ` Alan Cox
  (?)
  (?)
@ 2009-05-22 23:40             ` Larry H.
  2009-05-23  8:09                 ` Alan Cox
  -1 siblings, 1 reply; 220+ messages in thread
From: Larry H. @ 2009-05-22 23:40 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On 19:21 Fri 22 May     , Alan Cox wrote:
> Which patch are we talking about ? I'm all for a security option which
> clears *all* objects on freeing them (actually the poison debug is pretty
> close to this). That would fix these examples too.

Enabling SLAB poisoning by default will be a bad idea, let's stick to
zeroing these like I do in the current patch. I haven't figured out a
way to do the clearing in caches which don't have the SLAB_CONFIDENTIAL
flag when GFP_CONFIDENTIAL was used to allocate the object, and
sanitize_mem is disabled. That was the point of the page flag...

I looked for unused/re-usable flags too, but found none. It's
interesting to see SLUB and SLOB have their own page flags. Did anybody
oppose those when they were proposed? We should ask Lameter if he would
be keen on adding the confidential bit support there, or we will have to
figure out some other way to track the GFP_CONFIDENTIAL usage and clear
the data properly. We can't do that within SLAB/SLUB because they don't
store this information anywhere.

> Which could be another task stack you didn't clear - yes ?
> Including task stacks yes ? 
> And task stacks contain copies of important data yes ?

I haven't looked at the vma flag possibilities yet given the new
circumstances. But when we had the page flag, I was thinking about
making mlock() pages get automatically cleared upon release by setting
the bit on them. Just another example of a painless feature deriving
from the old patch.

Again a workaround must be found to do this without the page flag.

> Ditto - which is why I'm coming from the position of an "if we free it
> clear it" option. If you need that kind of security the cost should be
> more than acceptable - especially with modern processors that can do
> cache bypass on the clears.

Are you proposing that we should simply remove the confidential flags
and just stick to the unconditional sanitization when the boot option is
enabled? If positive, it will make things more simple and definitely is
better than nothing. I would have (still) preferred the other old
approach to be merged, but whatever works at this point.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for kernel memory sanitization
  2009-05-22 23:25             ` [PATCH] Support for kernel memory sanitization Larry H.
@ 2009-05-22 23:52               ` Randy Dunlap
  0 siblings, 0 replies; 220+ messages in thread
From: Randy Dunlap @ 2009-05-22 23:52 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

Larry H. wrote:
> [PATCH] Support for kernel memory sanitization
> 
> This patch adds support for the CONFIDENTIAL flag to the SLAB and SLUB
> allocators. An additional GFP flag is added for use with higher level
> allocators (GFP_CONFIDENTIAL, which implies GFP_ZERO).
> 
> A boot command line option (sanitize_mem) is added for the page
> allocator to perform sanitization of all pages upon release and
> allocation.
> 
> The code is largely based off the memory sanitization feature in the
> PaX project (licensed under the GPL v2 terms) and the original
> PG_sensitive patch which allowed fine-grained marking of pages using
> a page flag. The lack of a page flag makes the gfp flag mostly useless,
> since we can't track pages with the sensitive/confidential bit, and
> properly sanitize them on release. The only way to overcome this
> limitation is to enable the sanitize_mem boot option and perform
> unconditional page sanitization.
> 
> This avoids leaking sensitive information when memory is released to
> the system after use, for example in cryptographic subsystems. More
> specifically, the following threats are addressed:
> 
> 	1. Information leaks in use-after-free or uninitialized
> 	variable usage scenarios, such as CVE-2005-0400,
> 	CVE-2009-0787 and CVE-2007-6417.
> 
> 	2. Data remanence based attacks, such as Iceman/Coldboot,
> 	which combine cold rebooting and memory image scanning
> 	to extract cryptographic secrets (ex. detecting AES key
> 	expansion blocks, RSA key patterns, etc) or other
> 	confidential information.
> 
> 	3. Re-allocation based information leaks, especially in the
> 	SLAB/SLUB allocators which use LIFO caches and might expose
> 	sensitive data out of context (when a caller allocates an
> 	object and receives a pointer to a location which was used
> 	previously by another user).
> 
> The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
> Deallocation" paper by Jim Chow et. al from the Stanford University
> Department of Computer Science, explains the security implications of
> insecure deallocation, and provides extensive information with figures
> and applications thoroughly analyzed for this behavior [1]. More recently
> this issue came to widespread attention when the "Lest We Remember:
> Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
> published [2].
> 
> This patch has been tested on x86 and amd64, with and without HIGHMEM.
> 
> 	[1] http://www.stanford.edu/~blp/papers/shredding.html
> 	[2] http://citp.princeton.edu/memory/
> 	[3] http://marc.info/?l=linux-mm&m=124284428226461&w=2
> 	[4] http://marc.info/?t=124284431000002&r=1&w=2
> 
> Signed-off-by: Larry H. <research@subreption.com>


BTW, are you familiar with Documentation/SubmittingPatches,
section 12: Sign your work ?  in particular, this part:

"then you just add a line saying

	Signed-off-by: Random J Developer <random@developer.example.org>

using your real name (sorry, no pseudonyms or anonymous contributions.)"


-- 
~Randy
LPC 2009, Sept. 23-25, Portland, Oregon
http://linuxplumbersconf.org/2009/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 23:40             ` [patch 0/5] Support for sanitization flag in low-level page allocator Larry H.
@ 2009-05-23  8:09                 ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-23  8:09 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

> Enabling SLAB poisoning by default will be a bad idea

Why ?

> I looked for unused/re-usable flags too, but found none. It's
> interesting to see SLUB and SLOB have their own page flags. Did anybody
> oppose those when they were proposed? 

Certainly they were looked at - but the memory allocator is right at the
core of the system rather than an add on.

> > Ditto - which is why I'm coming from the position of an "if we free it
> > clear it" option. If you need that kind of security the cost should be
> > more than acceptable - especially with modern processors that can do
> > cache bypass on the clears.
> 
> Are you proposing that we should simply remove the confidential flags
> and just stick to the unconditional sanitization when the boot option is
> enabled? If positive, it will make things more simple and definitely is
> better than nothing. I would have (still) preferred the other old
> approach to be merged, but whatever works at this point.

I am because
- its easy to merge
- its non controversial
- it meets the security good practice and means we don't miss any
  alloc/free cases
- it avoid providing flags to help a trojan identify "interesting" data
  to acquire
- modern cpu memory clearing can be very cheap

and if it proves to expensive (which I don't think it will based upon
distro beta builds with slab poisoning enabled etc) then the more complex
approach you put forward can be built on top of it. Going this way first
doesn't have to exclude doing the more complex job later if it proves
needed.

Alan

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-23  8:09                 ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-23  8:09 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

> Enabling SLAB poisoning by default will be a bad idea

Why ?

> I looked for unused/re-usable flags too, but found none. It's
> interesting to see SLUB and SLOB have their own page flags. Did anybody
> oppose those when they were proposed? 

Certainly they were looked at - but the memory allocator is right at the
core of the system rather than an add on.

> > Ditto - which is why I'm coming from the position of an "if we free it
> > clear it" option. If you need that kind of security the cost should be
> > more than acceptable - especially with modern processors that can do
> > cache bypass on the clears.
> 
> Are you proposing that we should simply remove the confidential flags
> and just stick to the unconditional sanitization when the boot option is
> enabled? If positive, it will make things more simple and definitely is
> better than nothing. I would have (still) preferred the other old
> approach to be merged, but whatever works at this point.

I am because
- its easy to merge
- its non controversial
- it meets the security good practice and means we don't miss any
  alloc/free cases
- it avoid providing flags to help a trojan identify "interesting" data
  to acquire
- modern cpu memory clearing can be very cheap

and if it proves to expensive (which I don't think it will based upon
distro beta builds with slab poisoning enabled etc) then the more complex
approach you put forward can be built on top of it. Going this way first
doesn't have to exclude doing the more complex job later if it proves
needed.

Alan

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-22 11:38       ` Larry H.
@ 2009-05-23 12:49         ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-23 12:49 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, Alan Cox, pageexec


* Larry H. <research@subreption.com> wrote:

> NOTE: Let's keep the PaX Team on CC from now on, they might have further
> input to this discussion. (pageexec at freemail dot hu)
> 
> On 09:34 Fri 22 May     , Ingo Molnar wrote:
> > The whole kernel contains data that 'should not be leaked'.
> > _If_ any of this is done, i'd _very_ strongly suggest to describe it 
> > by what it does, not by what its subjective security attribute is.
> > 
> > 'PG_eyes_only' or 'PG_eagle_azf_compartmented' is silly naming. It 
> > is silly because it hardcodes one particular expectation/model of 
> > 'security'.
> > 
> > GFP_NON_PERSISTENT & PG_non_persistent is a _lot_ better, because it 
> > is a technical description of how information spreads. (which is the 
> > underlying principle of every security model)
> >
> > That name alone tells us everyting what this does: it does not 
> > allow this data to reach or touch persistent storage. It wont be 
> > swapped and it wont by saved by hibernation. It will also be 
> > cleared when freed, to achieve its goal of never touching 
> > persistent storage.
> 
> The problem is that these patches have a more broad purpose and I 
> never mentioned persistent storage as one of them (initially). 
> Check earlier messages to see what has been discussed so far.

You need to address my specific concerns instead of referring back 
to an earlier discussion. The patches touch code i maintain and i 
find them (and your latest resend) unacceptable.

> Regarding the naming changes, those have been done as of Rik's 
> comments and I would rather focus on the technical and 
> implementation side now.

Naming _is_ a technical issue. Especially here.

> > In-kernel crypto key storage using GFP_NON_PERSISTENT makes some 
> > sense - as long as the kernel stack itself is mared 
> > GFP_NON_PERSISTENT as well ... which is quite hairy from a 
> > performance point of view: we _dont_ want to clear the full 
> > stack page for every kernel thread exiting.
> 
> Burning the stack there is beyond overkill.

What you are missing is that your patch makes _no technical sense_ 
if you allow the same information to leak over the kernel stack. 
Kernel stacks can be freed and reused, swapped out and thus 
'exposed'.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-23 12:49         ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-23 12:49 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, Alan Cox, pageexec


* Larry H. <research@subreption.com> wrote:

> NOTE: Let's keep the PaX Team on CC from now on, they might have further
> input to this discussion. (pageexec at freemail dot hu)
> 
> On 09:34 Fri 22 May     , Ingo Molnar wrote:
> > The whole kernel contains data that 'should not be leaked'.
> > _If_ any of this is done, i'd _very_ strongly suggest to describe it 
> > by what it does, not by what its subjective security attribute is.
> > 
> > 'PG_eyes_only' or 'PG_eagle_azf_compartmented' is silly naming. It 
> > is silly because it hardcodes one particular expectation/model of 
> > 'security'.
> > 
> > GFP_NON_PERSISTENT & PG_non_persistent is a _lot_ better, because it 
> > is a technical description of how information spreads. (which is the 
> > underlying principle of every security model)
> >
> > That name alone tells us everyting what this does: it does not 
> > allow this data to reach or touch persistent storage. It wont be 
> > swapped and it wont by saved by hibernation. It will also be 
> > cleared when freed, to achieve its goal of never touching 
> > persistent storage.
> 
> The problem is that these patches have a more broad purpose and I 
> never mentioned persistent storage as one of them (initially). 
> Check earlier messages to see what has been discussed so far.

You need to address my specific concerns instead of referring back 
to an earlier discussion. The patches touch code i maintain and i 
find them (and your latest resend) unacceptable.

> Regarding the naming changes, those have been done as of Rik's 
> comments and I would rather focus on the technical and 
> implementation side now.

Naming _is_ a technical issue. Especially here.

> > In-kernel crypto key storage using GFP_NON_PERSISTENT makes some 
> > sense - as long as the kernel stack itself is mared 
> > GFP_NON_PERSISTENT as well ... which is quite hairy from a 
> > performance point of view: we _dont_ want to clear the full 
> > stack page for every kernel thread exiting.
> 
> Burning the stack there is beyond overkill.

What you are missing is that your patch makes _no technical sense_ 
if you allow the same information to leak over the kernel stack. 
Kernel stacks can be freed and reused, swapped out and thus 
'exposed'.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-23  8:09                 ` Alan Cox
@ 2009-05-23 15:56                   ` Arjan van de Ven
  -1 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-23 15:56 UTC (permalink / raw)
  To: Alan Cox
  Cc: Larry H.,
	Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On Sat, 23 May 2009 09:09:10 +0100
Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> > Enabling SLAB poisoning by default will be a bad idea
> 
> Why ?
> 
> > I looked for unused/re-usable flags too, but found none. It's
> > interesting to see SLUB and SLOB have their own page flags. Did
> > anybody oppose those when they were proposed? 
> 
> Certainly they were looked at - but the memory allocator is right at
> the core of the system rather than an add on.
> 
> > > Ditto - which is why I'm coming from the position of an "if we
> > > free it clear it" option. If you need that kind of security the
> > > cost should be more than acceptable - especially with modern
> > > processors that can do cache bypass on the clears.
> > 
> > Are you proposing that we should simply remove the confidential
> > flags and just stick to the unconditional sanitization when the
> > boot option is enabled? If positive, it will make things more
> > simple and definitely is better than nothing. I would have (still)
> > preferred the other old approach to be merged, but whatever works
> > at this point.
> 
> I am because
> - its easy to merge
> - its non controversial
> - it meets the security good practice and means we don't miss any
>   alloc/free cases
> - it avoid providing flags to help a trojan identify "interesting"
> data to acquire
> - modern cpu memory clearing can be very cheap

.. and if we zero on free, we don't need to zero on allocate.
While this is a little controversial, it does mean that at least part of
the cost is just time-shifted, which means it'll not be TOO bad
hopefully...



-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-23 15:56                   ` Arjan van de Ven
  0 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-23 15:56 UTC (permalink / raw)
  To: Alan Cox
  Cc: Larry H.,
	Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On Sat, 23 May 2009 09:09:10 +0100
Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> > Enabling SLAB poisoning by default will be a bad idea
> 
> Why ?
> 
> > I looked for unused/re-usable flags too, but found none. It's
> > interesting to see SLUB and SLOB have their own page flags. Did
> > anybody oppose those when they were proposed? 
> 
> Certainly they were looked at - but the memory allocator is right at
> the core of the system rather than an add on.
> 
> > > Ditto - which is why I'm coming from the position of an "if we
> > > free it clear it" option. If you need that kind of security the
> > > cost should be more than acceptable - especially with modern
> > > processors that can do cache bypass on the clears.
> > 
> > Are you proposing that we should simply remove the confidential
> > flags and just stick to the unconditional sanitization when the
> > boot option is enabled? If positive, it will make things more
> > simple and definitely is better than nothing. I would have (still)
> > preferred the other old approach to be merged, but whatever works
> > at this point.
> 
> I am because
> - its easy to merge
> - its non controversial
> - it meets the security good practice and means we don't miss any
>   alloc/free cases
> - it avoid providing flags to help a trojan identify "interesting"
> data to acquire
> - modern cpu memory clearing can be very cheap

.. and if we zero on free, we don't need to zero on allocate.
While this is a little controversial, it does mean that at least part of
the cost is just time-shifted, which means it'll not be TOO bad
hopefully...



-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* [PATCH] Support for unconditional page sanitization
  2009-05-23 15:56                   ` Arjan van de Ven
@ 2009-05-23 18:21                     ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-23 18:21 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

[PATCH] Support for unconditional page sanitization

A boot command line option (sanitize_mem) is added for the page
allocator to perform sanitization of all pages upon release.

This avoids leaking sensitive information when memory is released to
the system after use, for example in cryptographic subsystems. More
specifically, the following threats are addressed:

	1. Information leaks in use-after-free or uninitialized
	variable usage scenarios, such as CVE-2005-0400,
	CVE-2009-0787 and CVE-2007-6417.

	2. Data remanence based attacks, such as Iceman/Coldboot,
	which combine cold rebooting and memory image scanning
	to extract cryptographic secrets (ex. detecting AES key
	expansion blocks, RSA key patterns, etc) or other
	confidential information.

	3. Re-allocation based information leaks, especially in the
	SLAB/SLUB allocators which use LIFO caches and might expose
	sensitive data out of context (when a caller allocates an
	object and receives a pointer to a location which was used
	previously by another user).

The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
Deallocation" paper by Jim Chow et. al from the Stanford University
Department of Computer Science, explains the security implications of
insecure deallocation, and provides extensive information with figures
and applications thoroughly analyzed for this behavior [1]. More recently
this issue came to widespread attention when the "Lest We Remember:
Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
published [2].

The code is largely based off the memory sanitization feature in the
PaX project (licensed under the GPL v2 terms) and the original
PG_sensitive patch which allowed fine-grained marking of pages using
a page flag.

This patch has been tested on x86 and amd64, with and without HIGHMEM.

	[1] http://www.stanford.edu/~blp/papers/shredding.html
	[2] http://citp.princeton.edu/memory/
	[3] http://marc.info/?l=linux-mm&m=124284428226461&w=2
	[4] http://marc.info/?t=124284431000002&r=1&w=2

Signed-off-by: Larry Highsmith <research@subreption.com>

---
 Documentation/kernel-parameters.txt    |    2 ++
 arch/alpha/include/asm/kmap_types.h    |    3 ++-
 arch/arm/include/asm/kmap_types.h      |    1 +
 arch/avr32/include/asm/kmap_types.h    |    3 ++-
 arch/blackfin/include/asm/kmap_types.h |    1 +
 arch/cris/include/asm/kmap_types.h     |    1 +
 arch/h8300/include/asm/kmap_types.h    |    1 +
 arch/ia64/include/asm/kmap_types.h     |    3 ++-
 arch/m68k/include/asm/kmap_types_mm.h  |    1 +
 arch/m68k/include/asm/kmap_types_no.h  |    1 +
 arch/mips/include/asm/kmap_types.h     |    3 ++-
 arch/parisc/include/asm/kmap_types.h   |    3 ++-
 arch/powerpc/include/asm/kmap_types.h  |    1 +
 arch/s390/include/asm/kmap_types.h     |    1 +
 arch/sh/include/asm/kmap_types.h       |    3 ++-
 arch/sparc/include/asm/kmap_types.h    |    1 +
 arch/um/include/asm/kmap_types.h       |    1 +
 arch/x86/include/asm/kmap_types.h      |    3 ++-
 arch/xtensa/include/asm/kmap_types.h   |    1 +
 include/asm-frv/kmap_types.h           |    1 +
 include/asm-m32r/kmap_types.h          |    3 ++-
 include/asm-mn10300/kmap_types.h       |    1 +
 include/linux/highmem.h                |   12 ++++++++++++
 mm/page_alloc.c                        |   27 ++++++++++++++++++++++++++-
 24 files changed, 69 insertions(+), 9 deletions(-)

Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -2494,6 +2494,8 @@ and is between 256 and 4096 characters. 
 	norandmaps	Don't use address space randomization.  Equivalent to
 			echo 0 > /proc/sys/kernel/randomize_va_space
 
+	sanitize_mem	Enables sanitization of all allocated pages.
+
 ______________________________________________________________________
 
 TODO:
Index: linux-2.6/arch/alpha/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/kmap_types.h
+++ linux-2.6/arch/alpha/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)  KM_CLEARPAGE,
+D(14)  KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/arm/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/kmap_types.h
+++ linux-2.6/arch/arm/include/asm/kmap_types.h
@@ -18,6 +18,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/avr32/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/avr32/include/asm/kmap_types.h
+++ linux-2.6/arch/avr32/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(10)	KM_IRQ0,
 D(11)	KM_IRQ1,
 D(12)	KM_SOFTIRQ0,
 D(13)	KM_SOFTIRQ1,
-D(14)	KM_TYPE_NR
+D(14)	KM_CLEARPAGE,
+D(15)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/blackfin/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/kmap_types.h
+++ linux-2.6/arch/blackfin/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/cris/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/kmap_types.h
+++ linux-2.6/arch/cris/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/h8300/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/kmap_types.h
+++ linux-2.6/arch/h8300/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/ia64/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/kmap_types.h
+++ linux-2.6/arch/ia64/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_mm.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/m68k/include/asm/kmap_types_no.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_no.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_no.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/mips/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/mips/include/asm/kmap_types.h
+++ linux-2.6/arch/mips/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/parisc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/parisc/include/asm/kmap_types.h
+++ linux-2.6/arch/parisc/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/powerpc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/kmap_types.h
+++ linux-2.6/arch/powerpc/include/asm/kmap_types.h
@@ -26,6 +26,7 @@ enum km_type {
 	KM_SOFTIRQ1,
 	KM_PPC_SYNC_PAGE,
 	KM_PPC_SYNC_ICACHE,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/s390/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/kmap_types.h
+++ linux-2.6/arch/s390/include/asm/kmap_types.h
@@ -16,6 +16,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,	
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/sh/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/kmap_types.h
+++ linux-2.6/arch/sh/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/sparc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/kmap_types.h
+++ linux-2.6/arch/sparc/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/um/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/kmap_types.h
+++ linux-2.6/arch/um/include/asm/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/x86/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/kmap_types.h
+++ linux-2.6/arch/x86/include/asm/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/xtensa/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/xtensa/include/asm/kmap_types.h
+++ linux-2.6/arch/xtensa/include/asm/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
   KM_IRQ1,
   KM_SOFTIRQ0,
   KM_SOFTIRQ1,
+  KM_CLEARPAGE,
   KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-frv/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-frv/kmap_types.h
+++ linux-2.6/include/asm-frv/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-m32r/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/kmap_types.h
+++ linux-2.6/include/asm-m32r/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/include/asm-mn10300/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-mn10300/kmap_types.h
+++ linux-2.6/include/asm-mn10300/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/linux/highmem.h
===================================================================
--- linux-2.6.orig/include/linux/highmem.h
+++ linux-2.6/include/linux/highmem.h
@@ -124,6 +124,18 @@ static inline void clear_highpage(struct
 	kunmap_atomic(kaddr, KM_USER0);
 }
 
+static inline void sanitize_highpage(struct page *page)
+{
+	void *kaddr;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kaddr = kmap_atomic(page, KM_CLEARPAGE);
+	clear_page(kaddr);
+	kunmap_atomic(kaddr, KM_CLEARPAGE);
+	local_irq_restore(flags);
+}
+
 static inline void zero_user_segments(struct page *page,
 	unsigned start1, unsigned end1,
 	unsigned start2, unsigned end2)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -123,6 +123,7 @@ int min_free_kbytes = 1024;
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
+int sanitize_all_mem;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
   /*
@@ -221,6 +222,15 @@ static inline int bad_range(struct zone 
 }
 #endif
 
+static __init int setup_page_sanitization(char *s)
+{
+	printk(KERN_INFO "Memory sanitization enabled.\n");
+	sanitize_all_mem = 1;
+
+	return 0;
+}
+early_param("sanitize_mem", setup_page_sanitization);
+
 static void bad_page(struct page *page)
 {
 	static unsigned long resume;
@@ -545,6 +555,7 @@ static void free_one_page(struct zone *z
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long index = 1UL << order;
 	unsigned long flags;
 	int i;
 	int bad = 0;
@@ -559,6 +570,16 @@ static void __free_pages_ok(struct page 
 		debug_check_no_obj_freed(page_address(page),
 					   PAGE_SIZE << order);
 	}
+
+	/*
+	 * Page sanitization is enabled, let's clear the page contents before
+	 * release.
+	 */
+	if (sanitize_all_mem) {
+		for (; index; --index)
+			sanitize_highpage(page + index - 1);
+	}
+
 	arch_free_page(page, order);
 	kernel_map_pages(page, 1 << order, 0);
 
@@ -647,7 +668,7 @@ static int prep_new_page(struct page *pa
 	arch_alloc_page(page, order);
 	kernel_map_pages(page, 1 << order, 1);
 
-	if (gfp_flags & __GFP_ZERO)
+	if ((gfp_flags & __GFP_ZERO) && !sanitize_all_mem)
 		prep_zero_page(page, order, gfp_flags);
 
 	if (order && (gfp_flags & __GFP_COMP))
@@ -1009,6 +1030,10 @@ static void free_hot_cold_page(struct pa
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
 		debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
 	}
+
+	if (sanitize_all_mem)
+		sanitize_highpage(page);
+
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 

^ permalink raw reply	[flat|nested] 220+ messages in thread

* [PATCH] Support for unconditional page sanitization
@ 2009-05-23 18:21                     ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-23 18:21 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

[PATCH] Support for unconditional page sanitization

A boot command line option (sanitize_mem) is added for the page
allocator to perform sanitization of all pages upon release.

This avoids leaking sensitive information when memory is released to
the system after use, for example in cryptographic subsystems. More
specifically, the following threats are addressed:

	1. Information leaks in use-after-free or uninitialized
	variable usage scenarios, such as CVE-2005-0400,
	CVE-2009-0787 and CVE-2007-6417.

	2. Data remanence based attacks, such as Iceman/Coldboot,
	which combine cold rebooting and memory image scanning
	to extract cryptographic secrets (ex. detecting AES key
	expansion blocks, RSA key patterns, etc) or other
	confidential information.

	3. Re-allocation based information leaks, especially in the
	SLAB/SLUB allocators which use LIFO caches and might expose
	sensitive data out of context (when a caller allocates an
	object and receives a pointer to a location which was used
	previously by another user).

The "Shredding Your Garbage: Reducing Data Lifetime Through Secure
Deallocation" paper by Jim Chow et. al from the Stanford University
Department of Computer Science, explains the security implications of
insecure deallocation, and provides extensive information with figures
and applications thoroughly analyzed for this behavior [1]. More recently
this issue came to widespread attention when the "Lest We Remember:
Cold Boot Attacks on Encryption Keys" (by Halderman et. al) paper was
published [2].

The code is largely based off the memory sanitization feature in the
PaX project (licensed under the GPL v2 terms) and the original
PG_sensitive patch which allowed fine-grained marking of pages using
a page flag.

This patch has been tested on x86 and amd64, with and without HIGHMEM.

	[1] http://www.stanford.edu/~blp/papers/shredding.html
	[2] http://citp.princeton.edu/memory/
	[3] http://marc.info/?l=linux-mm&m=124284428226461&w=2
	[4] http://marc.info/?t=124284431000002&r=1&w=2

Signed-off-by: Larry Highsmith <research@subreption.com>

---
 Documentation/kernel-parameters.txt    |    2 ++
 arch/alpha/include/asm/kmap_types.h    |    3 ++-
 arch/arm/include/asm/kmap_types.h      |    1 +
 arch/avr32/include/asm/kmap_types.h    |    3 ++-
 arch/blackfin/include/asm/kmap_types.h |    1 +
 arch/cris/include/asm/kmap_types.h     |    1 +
 arch/h8300/include/asm/kmap_types.h    |    1 +
 arch/ia64/include/asm/kmap_types.h     |    3 ++-
 arch/m68k/include/asm/kmap_types_mm.h  |    1 +
 arch/m68k/include/asm/kmap_types_no.h  |    1 +
 arch/mips/include/asm/kmap_types.h     |    3 ++-
 arch/parisc/include/asm/kmap_types.h   |    3 ++-
 arch/powerpc/include/asm/kmap_types.h  |    1 +
 arch/s390/include/asm/kmap_types.h     |    1 +
 arch/sh/include/asm/kmap_types.h       |    3 ++-
 arch/sparc/include/asm/kmap_types.h    |    1 +
 arch/um/include/asm/kmap_types.h       |    1 +
 arch/x86/include/asm/kmap_types.h      |    3 ++-
 arch/xtensa/include/asm/kmap_types.h   |    1 +
 include/asm-frv/kmap_types.h           |    1 +
 include/asm-m32r/kmap_types.h          |    3 ++-
 include/asm-mn10300/kmap_types.h       |    1 +
 include/linux/highmem.h                |   12 ++++++++++++
 mm/page_alloc.c                        |   27 ++++++++++++++++++++++++++-
 24 files changed, 69 insertions(+), 9 deletions(-)

Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -2494,6 +2494,8 @@ and is between 256 and 4096 characters. 
 	norandmaps	Don't use address space randomization.  Equivalent to
 			echo 0 > /proc/sys/kernel/randomize_va_space
 
+	sanitize_mem	Enables sanitization of all allocated pages.
+
 ______________________________________________________________________
 
 TODO:
Index: linux-2.6/arch/alpha/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/kmap_types.h
+++ linux-2.6/arch/alpha/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)  KM_CLEARPAGE,
+D(14)  KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/arm/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/kmap_types.h
+++ linux-2.6/arch/arm/include/asm/kmap_types.h
@@ -18,6 +18,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/avr32/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/avr32/include/asm/kmap_types.h
+++ linux-2.6/arch/avr32/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(10)	KM_IRQ0,
 D(11)	KM_IRQ1,
 D(12)	KM_SOFTIRQ0,
 D(13)	KM_SOFTIRQ1,
-D(14)	KM_TYPE_NR
+D(14)	KM_CLEARPAGE,
+D(15)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/blackfin/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/blackfin/include/asm/kmap_types.h
+++ linux-2.6/arch/blackfin/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/cris/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/cris/include/asm/kmap_types.h
+++ linux-2.6/arch/cris/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/h8300/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/h8300/include/asm/kmap_types.h
+++ linux-2.6/arch/h8300/include/asm/kmap_types.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/ia64/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/kmap_types.h
+++ linux-2.6/arch/ia64/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_mm.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_mm.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/m68k/include/asm/kmap_types_no.h
===================================================================
--- linux-2.6.orig/arch/m68k/include/asm/kmap_types_no.h
+++ linux-2.6/arch/m68k/include/asm/kmap_types_no.h
@@ -15,6 +15,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/mips/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/mips/include/asm/kmap_types.h
+++ linux-2.6/arch/mips/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/parisc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/parisc/include/asm/kmap_types.h
+++ linux-2.6/arch/parisc/include/asm/kmap_types.h
@@ -22,7 +22,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/powerpc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/kmap_types.h
+++ linux-2.6/arch/powerpc/include/asm/kmap_types.h
@@ -26,6 +26,7 @@ enum km_type {
 	KM_SOFTIRQ1,
 	KM_PPC_SYNC_PAGE,
 	KM_PPC_SYNC_ICACHE,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/s390/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/kmap_types.h
+++ linux-2.6/arch/s390/include/asm/kmap_types.h
@@ -16,6 +16,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,	
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/sh/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/kmap_types.h
+++ linux-2.6/arch/sh/include/asm/kmap_types.h
@@ -24,7 +24,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/sparc/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/kmap_types.h
+++ linux-2.6/arch/sparc/include/asm/kmap_types.h
@@ -19,6 +19,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/um/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/kmap_types.h
+++ linux-2.6/arch/um/include/asm/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/arch/x86/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/kmap_types.h
+++ linux-2.6/arch/x86/include/asm/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/arch/xtensa/include/asm/kmap_types.h
===================================================================
--- linux-2.6.orig/arch/xtensa/include/asm/kmap_types.h
+++ linux-2.6/arch/xtensa/include/asm/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
   KM_IRQ1,
   KM_SOFTIRQ0,
   KM_SOFTIRQ1,
+  KM_CLEARPAGE,
   KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-frv/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-frv/kmap_types.h
+++ linux-2.6/include/asm-frv/kmap_types.h
@@ -23,6 +23,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/asm-m32r/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/kmap_types.h
+++ linux-2.6/include/asm-m32r/kmap_types.h
@@ -21,7 +21,8 @@ D(9)	KM_IRQ0,
 D(10)	KM_IRQ1,
 D(11)	KM_SOFTIRQ0,
 D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+D(13)	KM_CLEARPAGE,
+D(14)	KM_TYPE_NR
 };
 
 #undef D
Index: linux-2.6/include/asm-mn10300/kmap_types.h
===================================================================
--- linux-2.6.orig/include/asm-mn10300/kmap_types.h
+++ linux-2.6/include/asm-mn10300/kmap_types.h
@@ -25,6 +25,7 @@ enum km_type {
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
+	KM_CLEARPAGE,
 	KM_TYPE_NR
 };
 
Index: linux-2.6/include/linux/highmem.h
===================================================================
--- linux-2.6.orig/include/linux/highmem.h
+++ linux-2.6/include/linux/highmem.h
@@ -124,6 +124,18 @@ static inline void clear_highpage(struct
 	kunmap_atomic(kaddr, KM_USER0);
 }
 
+static inline void sanitize_highpage(struct page *page)
+{
+	void *kaddr;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kaddr = kmap_atomic(page, KM_CLEARPAGE);
+	clear_page(kaddr);
+	kunmap_atomic(kaddr, KM_CLEARPAGE);
+	local_irq_restore(flags);
+}
+
 static inline void zero_user_segments(struct page *page,
 	unsigned start1, unsigned end1,
 	unsigned start2, unsigned end2)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -123,6 +123,7 @@ int min_free_kbytes = 1024;
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
+int sanitize_all_mem;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
   /*
@@ -221,6 +222,15 @@ static inline int bad_range(struct zone 
 }
 #endif
 
+static __init int setup_page_sanitization(char *s)
+{
+	printk(KERN_INFO "Memory sanitization enabled.\n");
+	sanitize_all_mem = 1;
+
+	return 0;
+}
+early_param("sanitize_mem", setup_page_sanitization);
+
 static void bad_page(struct page *page)
 {
 	static unsigned long resume;
@@ -545,6 +555,7 @@ static void free_one_page(struct zone *z
 
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long index = 1UL << order;
 	unsigned long flags;
 	int i;
 	int bad = 0;
@@ -559,6 +570,16 @@ static void __free_pages_ok(struct page 
 		debug_check_no_obj_freed(page_address(page),
 					   PAGE_SIZE << order);
 	}
+
+	/*
+	 * Page sanitization is enabled, let's clear the page contents before
+	 * release.
+	 */
+	if (sanitize_all_mem) {
+		for (; index; --index)
+			sanitize_highpage(page + index - 1);
+	}
+
 	arch_free_page(page, order);
 	kernel_map_pages(page, 1 << order, 0);
 
@@ -647,7 +668,7 @@ static int prep_new_page(struct page *pa
 	arch_alloc_page(page, order);
 	kernel_map_pages(page, 1 << order, 1);
 
-	if (gfp_flags & __GFP_ZERO)
+	if ((gfp_flags & __GFP_ZERO) && !sanitize_all_mem)
 		prep_zero_page(page, order, gfp_flags);
 
 	if (order && (gfp_flags & __GFP_COMP))
@@ -1009,6 +1030,10 @@ static void free_hot_cold_page(struct pa
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
 		debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
 	}
+
+	if (sanitize_all_mem)
+		sanitize_highpage(page);
+
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for unconditional page sanitization
  2009-05-23 18:21                     ` Larry H.
@ 2009-05-23 21:05                       ` Arjan van de Ven
  -1 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-23 21:05 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, 23 May 2009 11:21:41 -0700
"Larry H." <research@subreption.com> wrote:

> +static inline void sanitize_highpage(struct page *page)

any reason we're not reusing clear_highpage() for this?
(I know it's currently slightly different, but that is fixable)


also, have you checked that you stopped clearing the page in the
normal anonymous memory pagefault handler path? If the page is 
guaranteed to be clear already you can save that copy
(basically you move the clear from allocate to free..)


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for unconditional page sanitization
@ 2009-05-23 21:05                       ` Arjan van de Ven
  0 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-23 21:05 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, 23 May 2009 11:21:41 -0700
"Larry H." <research@subreption.com> wrote:

> +static inline void sanitize_highpage(struct page *page)

any reason we're not reusing clear_highpage() for this?
(I know it's currently slightly different, but that is fixable)


also, have you checked that you stopped clearing the page in the
normal anonymous memory pagefault handler path? If the page is 
guaranteed to be clear already you can save that copy
(basically you move the clear from allocate to free..)


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-23 12:49         ` Ingo Molnar
@ 2009-05-23 22:28           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-23 22:28 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, Alan Cox, pageexec

On 14:49 Sat 23 May     , Ingo Molnar wrote:
> You need to address my specific concerns instead of referring back 
> to an earlier discussion. The patches touch code i maintain and i 
> find them (and your latest resend) unacceptable.

Meaning the latest boot option-based unconditional sanitization which
doesn't touch anything else and doesn't duplicate clearing (it only
performs such during release)?

> Naming _is_ a technical issue. Especially here.

True, that's no more of an issue since the page flag approach has been
left out of the patch (albeit it mutilates our possibilities to do
fine-grained clearing and track status across the different higher level
interfaces through the gfp flag). Do you still have a problem with
something related to naming?

If any of the variable names still don't catch your fancy, please let me
know.

> What you are missing is that your patch makes _no technical sense_ 
> if you allow the same information to leak over the kernel stack. 
> Kernel stacks can be freed and reused, swapped out and thus 
> 'exposed'.

Do you have technical evidence to back up that claim? Perhaps an
analysis and testcase that demonstrates true resilience of the kernel
stack information? Something that can convince me I'm mistaken by
showing that it isn't extremely volatile? That it doesn't get
overwritten to smithereens?

I have a simple testcase for vmalloc/kmalloc/page allocator
sanitization. The current patch covers both vmalloc and page allocators
well, since the former is basically dependent on the latter. kmalloc
still won't get sanitized until the slab is returned to the page
allocator (during cache shrink/reaping or when it becomes empty).

Also, a political question, are you the only current maintainer of the
affected code, or there are more people who might not necessarily share
your opinion on this?

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-23 22:28           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-23 22:28 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, Alan Cox, pageexec

On 14:49 Sat 23 May     , Ingo Molnar wrote:
> You need to address my specific concerns instead of referring back 
> to an earlier discussion. The patches touch code i maintain and i 
> find them (and your latest resend) unacceptable.

Meaning the latest boot option-based unconditional sanitization which
doesn't touch anything else and doesn't duplicate clearing (it only
performs such during release)?

> Naming _is_ a technical issue. Especially here.

True, that's no more of an issue since the page flag approach has been
left out of the patch (albeit it mutilates our possibilities to do
fine-grained clearing and track status across the different higher level
interfaces through the gfp flag). Do you still have a problem with
something related to naming?

If any of the variable names still don't catch your fancy, please let me
know.

> What you are missing is that your patch makes _no technical sense_ 
> if you allow the same information to leak over the kernel stack. 
> Kernel stacks can be freed and reused, swapped out and thus 
> 'exposed'.

Do you have technical evidence to back up that claim? Perhaps an
analysis and testcase that demonstrates true resilience of the kernel
stack information? Something that can convince me I'm mistaken by
showing that it isn't extremely volatile? That it doesn't get
overwritten to smithereens?

I have a simple testcase for vmalloc/kmalloc/page allocator
sanitization. The current patch covers both vmalloc and page allocators
well, since the former is basically dependent on the latter. kmalloc
still won't get sanitized until the slab is returned to the page
allocator (during cache shrink/reaping or when it becomes empty).

Also, a political question, are you the only current maintainer of the
affected code, or there are more people who might not necessarily share
your opinion on this?

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-23 12:49         ` Ingo Molnar
@ 2009-05-23 22:42           ` Rik van Riel
  -1 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-23 22:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, Alan Cox,
	pageexec

Ingo Molnar wrote:

> What you are missing is that your patch makes _no technical sense_ 
> if you allow the same information to leak over the kernel stack. 
> Kernel stacks can be freed and reused, swapped out and thus 
> 'exposed'.

Kernel stacks may be freed and reused, but Larry's latest
patch takes care of that by clearing them at page free
time.

As for being swapped out - I do not believe that kernel
stacks can ever be swapped out in Linux.

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-23 22:42           ` Rik van Riel
  0 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-23 22:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, Alan Cox,
	pageexec

Ingo Molnar wrote:

> What you are missing is that your patch makes _no technical sense_ 
> if you allow the same information to leak over the kernel stack. 
> Kernel stacks can be freed and reused, swapped out and thus 
> 'exposed'.

Kernel stacks may be freed and reused, but Larry's latest
patch takes care of that by clearing them at page free
time.

As for being swapped out - I do not believe that kernel
stacks can ever be swapped out in Linux.

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for unconditional page sanitization
  2009-05-23 21:05                       ` Arjan van de Ven
@ 2009-05-24 10:19                         ` pageexec
  -1 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-24 10:19 UTC (permalink / raw)
  To: Larry H., Arjan van de Ven
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

On 23 May 2009 at 14:05, Arjan van de Ven wrote:

> On Sat, 23 May 2009 11:21:41 -0700
> "Larry H." <research@subreption.com> wrote:
> 
> > +static inline void sanitize_highpage(struct page *page)
> 
> any reason we're not reusing clear_highpage() for this?
> (I know it's currently slightly different, but that is fixable)

KM_USER0 users are not supposed to be called from soft/hard irq
contexts for high memory pages, something that cannot be guaranteed
at this low level of page freeing (i.e., we could be interrupting
a clear_highmem and overwrite its KM_USER0 mapping, leaving it dead
in the water when we return there). in other words, sanitization
must be able to nest within KM_USER*, so that pretty much calls for
its own slot.

the alternative is to change KM_USER* semantics and allow its use
from the same contexts as free_page et al., but given the existing
users, that may very well be considered overkill.

on a related note, one could already say that disabling interrupts
during a memset over a page or more is already bad enough for your
real-time response times, so you may want to make this whole change
depend on the kernel's preemption model or at least document it.

> also, have you checked that you stopped clearing the page in the
> normal anonymous memory pagefault handler path? If the page is 
> guaranteed to be clear already you can save that copy
> (basically you move the clear from allocate to free..)

all new page allocations end up in prep_new_page and the clear_highpage
(memset) there depends on !sanitize_all_mem.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for unconditional page sanitization
@ 2009-05-24 10:19                         ` pageexec
  0 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-24 10:19 UTC (permalink / raw)
  To: Larry H., Arjan van de Ven
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

On 23 May 2009 at 14:05, Arjan van de Ven wrote:

> On Sat, 23 May 2009 11:21:41 -0700
> "Larry H." <research@subreption.com> wrote:
> 
> > +static inline void sanitize_highpage(struct page *page)
> 
> any reason we're not reusing clear_highpage() for this?
> (I know it's currently slightly different, but that is fixable)

KM_USER0 users are not supposed to be called from soft/hard irq
contexts for high memory pages, something that cannot be guaranteed
at this low level of page freeing (i.e., we could be interrupting
a clear_highmem and overwrite its KM_USER0 mapping, leaving it dead
in the water when we return there). in other words, sanitization
must be able to nest within KM_USER*, so that pretty much calls for
its own slot.

the alternative is to change KM_USER* semantics and allow its use
from the same contexts as free_page et al., but given the existing
users, that may very well be considered overkill.

on a related note, one could already say that disabling interrupts
during a memset over a page or more is already bad enough for your
real-time response times, so you may want to make this whole change
depend on the kernel's preemption model or at least document it.

> also, have you checked that you stopped clearing the page in the
> normal anonymous memory pagefault handler path? If the page is 
> guaranteed to be clear already you can save that copy
> (basically you move the clear from allocate to free..)

all new page allocations end up in prep_new_page and the clear_highpage
(memset) there depends on !sanitize_all_mem.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for unconditional page sanitization
  2009-05-24 10:19                         ` pageexec
@ 2009-05-24 16:38                           ` Arjan van de Ven
  -1 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-24 16:38 UTC (permalink / raw)
  To: pageexec
  Cc: Larry H.,
	Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

On Sun, 24 May 2009 12:19:48 +0200
pageexec@freemail.hu wrote:

> On 23 May 2009 at 14:05, Arjan van de Ven wrote:
> 
> > On Sat, 23 May 2009 11:21:41 -0700
> > "Larry H." <research@subreption.com> wrote:
> > 
> > > +static inline void sanitize_highpage(struct page *page)
> > 
> > any reason we're not reusing clear_highpage() for this?
> > (I know it's currently slightly different, but that is fixable)
> 
> KM_USER0 users are not supposed to be called from soft/hard irq
> contexts for high memory pages, something that cannot be guaranteed
> at this low level of page freeing (i.e., we could be interrupting
> a clear_highmem and overwrite its KM_USER0 mapping, leaving it dead
> in the water when we return there). in other words, sanitization
> must be able to nest within KM_USER*, so that pretty much calls for
> its own slot.

no arguement that current clear_highpage isn't a fit. I was more
thinking about using the content of sanitize_highpage(), and just
calling that clear_highpage(). (or in other words, improve
clear_highpage to be usable in more situations)


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [PATCH] Support for unconditional page sanitization
@ 2009-05-24 16:38                           ` Arjan van de Ven
  0 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-24 16:38 UTC (permalink / raw)
  To: pageexec
  Cc: Larry H.,
	Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

On Sun, 24 May 2009 12:19:48 +0200
pageexec@freemail.hu wrote:

> On 23 May 2009 at 14:05, Arjan van de Ven wrote:
> 
> > On Sat, 23 May 2009 11:21:41 -0700
> > "Larry H." <research@subreption.com> wrote:
> > 
> > > +static inline void sanitize_highpage(struct page *page)
> > 
> > any reason we're not reusing clear_highpage() for this?
> > (I know it's currently slightly different, but that is fixable)
> 
> KM_USER0 users are not supposed to be called from soft/hard irq
> contexts for high memory pages, something that cannot be guaranteed
> at this low level of page freeing (i.e., we could be interrupting
> a clear_highmem and overwrite its KM_USER0 mapping, leaving it dead
> in the water when we return there). in other words, sanitization
> must be able to nest within KM_USER*, so that pretty much calls for
> its own slot.

no arguement that current clear_highpage isn't a fit. I was more
thinking about using the content of sanitize_highpage(), and just
calling that clear_highpage(). (or in other words, improve
clear_highpage to be usable in more situations)


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* [PATCH] Sanitize memory on kfree() and kmem_cache_free()
  2009-05-23 22:42           ` Rik van Riel
@ 2009-05-25  1:17             ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-25  1:17 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Ingo Molnar, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Alan Cox, pageexec

(Was Re: [patch 0/5] Support for sanitization flag in low-level page
allocator)

On 18:42 Sat 23 May     , Rik van Riel wrote:
> Ingo Molnar wrote:
>
>> What you are missing is that your patch makes _no technical sense_ if you 
>> allow the same information to leak over the kernel stack. Kernel stacks 
>> can be freed and reused, swapped out and thus 'exposed'.
>
> Kernel stacks may be freed and reused, but Larry's latest
> patch takes care of that by clearing them at page free
> time.

[PATCH] Sanitize memory on kfree() and kmem_cache_free()

This depends on the previous sanitize-mem.patch and implements object
clearing for SLAB and SLUB. Only the SLUB allocator has been tested,
and this patch successfully enforces clearing on kfree() for both
standard caches and private ones (through kmem_cache_free()).

The following test results can be observed when this patch is applied
along sanitize-mem:

   Name 	  Result 	 Object
  ---------------------------------------
   get_free_page 	OK. 	 e4011000
       vmalloc(256) 	OK. 	 e632e000
      vmalloc(2048) 	OK. 	 e6331000
      vmalloc(4096) 	OK. 	 e6334000
      vmalloc(8192) 	OK. 	 e6337000
     vmalloc(32768) 	OK. 	 e633b000
         kmalloc-32 	OK. 	 e5009904
         kmalloc-64 	OK. 	 e404bc04
         kmalloc-96 	OK. 	 e5230b44
        kmalloc-128 	OK. 	 e5221f84
        kmalloc-256 	OK. 	 e4104304
        kmalloc-512 	OK. 	 e40a9804
       kmalloc-1024 	OK. 	 e5137404
       kmalloc-2048 	OK. 	 e5277004
       kmalloc-4096 	OK. 	 e415c004
       kmalloc-8192 	OK. 	 e4092004

Without both:

   Name 	  Result 	 Object
  ---------------------------------------
   get_free_page 	FAILED. 	 e412d000
       vmalloc(256) 	FAILED. 	 e6020000
      vmalloc(2048) 	FAILED. 	 e6023000
      vmalloc(4096) 	FAILED. 	 e6026000
      vmalloc(8192) 	FAILED. 	 e6029000
     vmalloc(32768) 	FAILED. 	 e602d000
         kmalloc-32 	FAILED. 	 e5009924
         kmalloc-64 	FAILED. 	 e5146fc4
         kmalloc-96 	FAILED. 	 e5320d84
        kmalloc-128 	FAILED. 	 e5019484
        kmalloc-256 	FAILED. 	 e4128104
        kmalloc-512 	FAILED. 	 e40df804
       kmalloc-1024 	FAILED. 	 e4a36c04
       kmalloc-2048 	FAILED. 	 e4159004
       kmalloc-4096 	FAILED. 	 e417f004
       kmalloc-8192 	FAILED. 	 e4180004

It takes care of handling empty slabs by ignoring them to avoid
duplication of the clearing operation. In addition, it performs
basic validation of the object and cache pointers, since it is
lacking for kmem_cache_free(). Furthermore, when a cache has
poisoning enabled (SLAB_POISON), the clearing process is skipped,
since poisoning itself will overwrite the object's contents with
a known pattern.

Signed-off-by: Larry Highsmith <research@subreption.com>

---
 mm/slab.c |    9 +++++++++
 mm/slub.c |   32 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -3520,6 +3520,15 @@ static inline void __cache_free(struct k
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
 	/*
+	 * If unconditional memory sanitization is enabled, the object is
+	 * cleared before it's put back into the cache. Using obj_offset and
+	 * obj_size we can coexist with the debugging (redzone, poisoning, etc)
+	 * facilities.
+	 */
+	if (sanitize_all_mem)
+		memset(objp + obj_offset(cachep), 0, obj_size(cachep));
+
+	/*
 	 * Skip calling cache_free_alien() when the platform is not numa.
 	 * This will avoid cache misses that happen while accessing slabp (which
 	 * is per page memory  reference) to get nodeid. Instead use a global
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -1269,6 +1269,36 @@ static inline int lock_and_freeze_slab(s
 }
 
 /*
+ * Slab object sanitization
+ */
+static void sanitize_slab_obj(struct kmem_cache *s, struct page *page, void *object)
+{
+	if (!sanitize_all_mem)
+		return;
+
+	/* SLAB_POISON makes clearing unnecessary */
+	if (s->offset || unlikely(s->flags & SLAB_POISON))
+		return;
+
+	/*
+	 * The slab is empty, it will be returned to page allocator by
+	 * discard_slab()->__slab_free(). It will be cleared there, thus
+	 * we skip it here.
+	 */
+	if (unlikely(!page->inuse))
+		return;
+
+	/* Validate that pointer indeed belongs to slab page */
+	if (!PageSlab(page) || (page->slab != s))
+		return;
+
+	if (!check_valid_pointer(s, page, object))
+		return;
+
+	memset(object, 0, s->objsize);
+}
+
+/*
  * Try to allocate a partial slab from a specific node.
  */
 static struct page *get_partial_node(struct kmem_cache_node *n)
@@ -1741,6 +1771,7 @@ void kmem_cache_free(struct kmem_cache *
 
 	page = virt_to_head_page(x);
 
+	sanitize_slab_obj(s, page, x);
 	slab_free(s, page, x, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
@@ -2752,6 +2783,7 @@ void kfree(const void *x)
 		put_page(page);
 		return;
 	}
+	sanitize_slab_obj(page->slab, page, object);
 	slab_free(page->slab, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);


^ permalink raw reply	[flat|nested] 220+ messages in thread

* [PATCH] Sanitize memory on kfree() and kmem_cache_free()
@ 2009-05-25  1:17             ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-25  1:17 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Ingo Molnar, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Alan Cox, pageexec

(Was Re: [patch 0/5] Support for sanitization flag in low-level page
allocator)

On 18:42 Sat 23 May     , Rik van Riel wrote:
> Ingo Molnar wrote:
>
>> What you are missing is that your patch makes _no technical sense_ if you 
>> allow the same information to leak over the kernel stack. Kernel stacks 
>> can be freed and reused, swapped out and thus 'exposed'.
>
> Kernel stacks may be freed and reused, but Larry's latest
> patch takes care of that by clearing them at page free
> time.

[PATCH] Sanitize memory on kfree() and kmem_cache_free()

This depends on the previous sanitize-mem.patch and implements object
clearing for SLAB and SLUB. Only the SLUB allocator has been tested,
and this patch successfully enforces clearing on kfree() for both
standard caches and private ones (through kmem_cache_free()).

The following test results can be observed when this patch is applied
along sanitize-mem:

   Name 	  Result 	 Object
  ---------------------------------------
   get_free_page 	OK. 	 e4011000
       vmalloc(256) 	OK. 	 e632e000
      vmalloc(2048) 	OK. 	 e6331000
      vmalloc(4096) 	OK. 	 e6334000
      vmalloc(8192) 	OK. 	 e6337000
     vmalloc(32768) 	OK. 	 e633b000
         kmalloc-32 	OK. 	 e5009904
         kmalloc-64 	OK. 	 e404bc04
         kmalloc-96 	OK. 	 e5230b44
        kmalloc-128 	OK. 	 e5221f84
        kmalloc-256 	OK. 	 e4104304
        kmalloc-512 	OK. 	 e40a9804
       kmalloc-1024 	OK. 	 e5137404
       kmalloc-2048 	OK. 	 e5277004
       kmalloc-4096 	OK. 	 e415c004
       kmalloc-8192 	OK. 	 e4092004

Without both:

   Name 	  Result 	 Object
  ---------------------------------------
   get_free_page 	FAILED. 	 e412d000
       vmalloc(256) 	FAILED. 	 e6020000
      vmalloc(2048) 	FAILED. 	 e6023000
      vmalloc(4096) 	FAILED. 	 e6026000
      vmalloc(8192) 	FAILED. 	 e6029000
     vmalloc(32768) 	FAILED. 	 e602d000
         kmalloc-32 	FAILED. 	 e5009924
         kmalloc-64 	FAILED. 	 e5146fc4
         kmalloc-96 	FAILED. 	 e5320d84
        kmalloc-128 	FAILED. 	 e5019484
        kmalloc-256 	FAILED. 	 e4128104
        kmalloc-512 	FAILED. 	 e40df804
       kmalloc-1024 	FAILED. 	 e4a36c04
       kmalloc-2048 	FAILED. 	 e4159004
       kmalloc-4096 	FAILED. 	 e417f004
       kmalloc-8192 	FAILED. 	 e4180004

It takes care of handling empty slabs by ignoring them to avoid
duplication of the clearing operation. In addition, it performs
basic validation of the object and cache pointers, since it is
lacking for kmem_cache_free(). Furthermore, when a cache has
poisoning enabled (SLAB_POISON), the clearing process is skipped,
since poisoning itself will overwrite the object's contents with
a known pattern.

Signed-off-by: Larry Highsmith <research@subreption.com>

---
 mm/slab.c |    9 +++++++++
 mm/slub.c |   32 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -3520,6 +3520,15 @@ static inline void __cache_free(struct k
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
 	/*
+	 * If unconditional memory sanitization is enabled, the object is
+	 * cleared before it's put back into the cache. Using obj_offset and
+	 * obj_size we can coexist with the debugging (redzone, poisoning, etc)
+	 * facilities.
+	 */
+	if (sanitize_all_mem)
+		memset(objp + obj_offset(cachep), 0, obj_size(cachep));
+
+	/*
 	 * Skip calling cache_free_alien() when the platform is not numa.
 	 * This will avoid cache misses that happen while accessing slabp (which
 	 * is per page memory  reference) to get nodeid. Instead use a global
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -1269,6 +1269,36 @@ static inline int lock_and_freeze_slab(s
 }
 
 /*
+ * Slab object sanitization
+ */
+static void sanitize_slab_obj(struct kmem_cache *s, struct page *page, void *object)
+{
+	if (!sanitize_all_mem)
+		return;
+
+	/* SLAB_POISON makes clearing unnecessary */
+	if (s->offset || unlikely(s->flags & SLAB_POISON))
+		return;
+
+	/*
+	 * The slab is empty, it will be returned to page allocator by
+	 * discard_slab()->__slab_free(). It will be cleared there, thus
+	 * we skip it here.
+	 */
+	if (unlikely(!page->inuse))
+		return;
+
+	/* Validate that pointer indeed belongs to slab page */
+	if (!PageSlab(page) || (page->slab != s))
+		return;
+
+	if (!check_valid_pointer(s, page, object))
+		return;
+
+	memset(object, 0, s->objsize);
+}
+
+/*
  * Try to allocate a partial slab from a specific node.
  */
 static struct page *get_partial_node(struct kmem_cache_node *n)
@@ -1741,6 +1771,7 @@ void kmem_cache_free(struct kmem_cache *
 
 	page = virt_to_head_page(x);
 
+	sanitize_slab_obj(s, page, x);
 	slab_free(s, page, x, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
@@ -2752,6 +2783,7 @@ void kfree(const void *x)
 		put_page(page);
 		return;
 	}
+	sanitize_slab_obj(page->slab, page, object);
 	slab_free(page->slab, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-21 19:26       ` Alan Cox
@ 2009-05-26 19:02         ` Pavel Machek
  -1 siblings, 0 replies; 220+ messages in thread
From: Pavel Machek @ 2009-05-26 19:02 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Hi!

> > I could also imagine the suspend-to-disk code skipping
> > PG_sensitive pages when storing data to disk, and
> > replacing it with some magic signature so programs
> > that use special PG_sensitive buffers can know that
> > their crypto key disappeared after a restore.
> 
> Its irrelevant in the simple S2D case. I just patch other bits of the
> suspend image to mail me the new key later. The right answer is crypted
> swap combined with a hard disk password and thus a crypted and locked
> suspend image. Playing the "I must not miss any page which might be

uswsusp does have internal encryption, and can use dm_crypt encrypted
swap... So yes, we can do encrypted swap & s2disk today.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-26 19:02         ` Pavel Machek
  0 siblings, 0 replies; 220+ messages in thread
From: Pavel Machek @ 2009-05-26 19:02 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Peter Zijlstra, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Hi!

> > I could also imagine the suspend-to-disk code skipping
> > PG_sensitive pages when storing data to disk, and
> > replacing it with some magic signature so programs
> > that use special PG_sensitive buffers can know that
> > their crypto key disappeared after a restore.
> 
> Its irrelevant in the simple S2D case. I just patch other bits of the
> suspend image to mail me the new key later. The right answer is crypted
> swap combined with a hard disk password and thus a crypted and locked
> suspend image. Playing the "I must not miss any page which might be

uswsusp does have internal encryption, and can use dm_crypt encrypted
swap... So yes, we can do encrypted swap & s2disk today.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-23 22:42           ` Rik van Riel
@ 2009-05-27 22:34             ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-27 22:34 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, Alan Cox,
	pageexec


* Rik van Riel <riel@redhat.com> wrote:

> Ingo Molnar wrote:
>
>> What you are missing is that your patch makes _no technical 
>> sense_ if you allow the same information to leak over the kernel 
>> stack. Kernel stacks can be freed and reused, swapped out and 
>> thus 'exposed'.
>
> Kernel stacks may be freed and reused, but Larry's latest patch 
> takes care of that by clearing them at page free time.
>
> As for being swapped out - I do not believe that kernel stacks can 
> ever be swapped out in Linux.

yes, i referred to that as an undesirable option - because it slows 
down pthread_create() quite substantially.

This needs before/after pthread_create() benchmark results.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-27 22:34             ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-27 22:34 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, Alan Cox,
	pageexec


* Rik van Riel <riel@redhat.com> wrote:

> Ingo Molnar wrote:
>
>> What you are missing is that your patch makes _no technical 
>> sense_ if you allow the same information to leak over the kernel 
>> stack. Kernel stacks can be freed and reused, swapped out and 
>> thus 'exposed'.
>
> Kernel stacks may be freed and reused, but Larry's latest patch 
> takes care of that by clearing them at page free time.
>
> As for being swapped out - I do not believe that kernel stacks can 
> ever be swapped out in Linux.

yes, i referred to that as an undesirable option - because it slows 
down pthread_create() quite substantially.

This needs before/after pthread_create() benchmark results.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-27 22:34             ` Ingo Molnar
@ 2009-05-28  6:27               ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-28  6:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

> > As for being swapped out - I do not believe that kernel stacks can 
> > ever be swapped out in Linux.
> 
> yes, i referred to that as an undesirable option - because it slows 
> down pthread_create() quite substantially.
> 
> This needs before/after pthread_create() benchmark results.

kernel stacks can end up places you don't expect on hypervisor based
systems.

In most respects the benchmarks are pretty irrelevant - wiping stuff has
a performance cost, but its the sort of thing you only want to do when
you have a security requirement that needs it. At that point the
performance is secondary.

Alan

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28  6:27               ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-28  6:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

> > As for being swapped out - I do not believe that kernel stacks can 
> > ever be swapped out in Linux.
> 
> yes, i referred to that as an undesirable option - because it slows 
> down pthread_create() quite substantially.
> 
> This needs before/after pthread_create() benchmark results.

kernel stacks can end up places you don't expect on hypervisor based
systems.

In most respects the benchmarks are pretty irrelevant - wiping stuff has
a performance cost, but its the sort of thing you only want to do when
you have a security requirement that needs it. At that point the
performance is secondary.

Alan

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28  6:27               ` Alan Cox
@ 2009-05-28  7:00                 ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-28  7:00 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On 07:27 Thu 28 May     , Alan Cox wrote:
> > > As for being swapped out - I do not believe that kernel stacks can 
> > > ever be swapped out in Linux.
> > 
> > yes, i referred to that as an undesirable option - because it slows 
> > down pthread_create() quite substantially.
> > 
> > This needs before/after pthread_create() benchmark results.
> 
> kernel stacks can end up places you don't expect on hypervisor based
> systems.
> 
> In most respects the benchmarks are pretty irrelevant - wiping stuff has
> a performance cost, but its the sort of thing you only want to do when
> you have a security requirement that needs it. At that point the
> performance is secondary.
> 
> Alan

Right, besides I believe Ingo is confused about the nature of the patch.
It looks like he believes it's about userland memory sanitization, when
that isn't what is being done here.

If he still believe this has anything to do with it directly, or can
introduce a performance impact on pthread_create() (remember we are
sanitizing on release only...), I'll be pleased to provide benchmark
results that prove it wrong (or right, if it was the case).

Any existent benchmark tests available that I can modify to suit our
needs here, or I'll need to waste some time on writing them from scratch?

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28  7:00                 ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-28  7:00 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On 07:27 Thu 28 May     , Alan Cox wrote:
> > > As for being swapped out - I do not believe that kernel stacks can 
> > > ever be swapped out in Linux.
> > 
> > yes, i referred to that as an undesirable option - because it slows 
> > down pthread_create() quite substantially.
> > 
> > This needs before/after pthread_create() benchmark results.
> 
> kernel stacks can end up places you don't expect on hypervisor based
> systems.
> 
> In most respects the benchmarks are pretty irrelevant - wiping stuff has
> a performance cost, but its the sort of thing you only want to do when
> you have a security requirement that needs it. At that point the
> performance is secondary.
> 
> Alan

Right, besides I believe Ingo is confused about the nature of the patch.
It looks like he believes it's about userland memory sanitization, when
that isn't what is being done here.

If he still believe this has anything to do with it directly, or can
introduce a performance impact on pthread_create() (remember we are
sanitizing on release only...), I'll be pleased to provide benchmark
results that prove it wrong (or right, if it was the case).

Any existent benchmark tests available that I can modify to suit our
needs here, or I'll need to waste some time on writing them from scratch?

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28  6:27               ` Alan Cox
@ 2009-05-28  9:08                 ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-28  9:08 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds


* Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> > > As for being swapped out - I do not believe that kernel stacks can 
> > > ever be swapped out in Linux.
> > 
> > yes, i referred to that as an undesirable option - because it slows 
> > down pthread_create() quite substantially.
> > 
> > This needs before/after pthread_create() benchmark results.
> 
> kernel stacks can end up places you don't expect on hypervisor 
> based systems.
> 
> In most respects the benchmarks are pretty irrelevant - wiping 
> stuff has a performance cost, but its the sort of thing you only 
> want to do when you have a security requirement that needs it. At 
> that point the performance is secondary.

Bechmarks, of course, are not irrelevant _at all_.

So i'm asking for this "clear kernel stacks on freeing" aspect to be 
benchmarked thoroughly, as i expect it to have a negative impact - 
otherwise i'm NAK-ing this. Please Cc: me to measurements results.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28  9:08                 ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-28  9:08 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds


* Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> > > As for being swapped out - I do not believe that kernel stacks can 
> > > ever be swapped out in Linux.
> > 
> > yes, i referred to that as an undesirable option - because it slows 
> > down pthread_create() quite substantially.
> > 
> > This needs before/after pthread_create() benchmark results.
> 
> kernel stacks can end up places you don't expect on hypervisor 
> based systems.
> 
> In most respects the benchmarks are pretty irrelevant - wiping 
> stuff has a performance cost, but its the sort of thing you only 
> want to do when you have a security requirement that needs it. At 
> that point the performance is secondary.

Bechmarks, of course, are not irrelevant _at all_.

So i'm asking for this "clear kernel stacks on freeing" aspect to be 
benchmarked thoroughly, as i expect it to have a negative impact - 
otherwise i'm NAK-ing this. Please Cc: me to measurements results.

Thanks,

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28  9:08                 ` Ingo Molnar
@ 2009-05-28 11:50                   ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-28 11:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> > In most respects the benchmarks are pretty irrelevant - wiping 
> > stuff has a performance cost, but its the sort of thing you only 
> > want to do when you have a security requirement that needs it. At 
> > that point the performance is secondary.
> 
> Bechmarks, of course, are not irrelevant _at all_.
> 
> So i'm asking for this "clear kernel stacks on freeing" aspect to be 
> benchmarked thoroughly, as i expect it to have a negative impact - 
> otherwise i'm NAK-ing this. 

Ingo you are completely missing the point

The performance cost of such a security action are NIL when the feature
is disabled. So the performance cost in the general case is irrelevant.

If you need this kind of data wiping then the performance hit
is basically irrelevant, the security comes first. You can NAK it all you
like but it simply means that such users either have to apply patches or
run something else.

If it harmed general user performance you'd have a point - but its like
SELinux you don't have to use it if you don't need the feature. Which it
must be said is a lot better than much of the scheduler crud that has
appeared over time which you can't make go away.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 11:50                   ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-28 11:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> > In most respects the benchmarks are pretty irrelevant - wiping 
> > stuff has a performance cost, but its the sort of thing you only 
> > want to do when you have a security requirement that needs it. At 
> > that point the performance is secondary.
> 
> Bechmarks, of course, are not irrelevant _at all_.
> 
> So i'm asking for this "clear kernel stacks on freeing" aspect to be 
> benchmarked thoroughly, as i expect it to have a negative impact - 
> otherwise i'm NAK-ing this. 

Ingo you are completely missing the point

The performance cost of such a security action are NIL when the feature
is disabled. So the performance cost in the general case is irrelevant.

If you need this kind of data wiping then the performance hit
is basically irrelevant, the security comes first. You can NAK it all you
like but it simply means that such users either have to apply patches or
run something else.

If it harmed general user performance you'd have a point - but its like
SELinux you don't have to use it if you don't need the feature. Which it
must be said is a lot better than much of the scheduler crud that has
appeared over time which you can't make go away.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 18:30 ` Larry H.
@ 2009-05-28 12:48   ` Pavel Machek
  -1 siblings, 0 replies; 220+ messages in thread
From: Pavel Machek @ 2009-05-28 12:48 UTC (permalink / raw)
  To: Larry H.; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Hi!

> Index: linux-2.6/mm/Kconfig
> ===================================================================
> --- linux-2.6.orig/mm/Kconfig
> +++ linux-2.6/mm/Kconfig
> @@ -155,6 +155,26 @@ config PAGEFLAGS_EXTENDED
>  	def_bool y
>  	depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM
>  
> +config PAGE_SENSITIVE
> +	bool "Support for selective page sanitization"
> +	help
> +	 This option provides support for honoring the sensitive bit
> +	 in the low level page allocator. This bit is used to mark
> +	 pages that will contain sensitive information (such as
> +	 cryptographic secrets and credentials).
> +
> +	 Pages marked with the sensitive bit will be sanitized upon
> +	 release, to prevent information leaks and data remanence that
> +	 could allow Iceman/coldboot attacks to reveal such data.
> +
> +	 If you are unsure, select N. This option might introduce a
> +	 minimal performance impact on those subsystems that make
> +	 use of the flag associated with the sensitive bit.
> +
> +	 If you use the cryptographic API or want to prevent tty
> +	 information leaks locally, you most likely want to enable
> +	 this.

This should not be configurable. Runtime config, defaulting to
'sanitize' may make some sense, but... better just be secure.

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 12:48   ` Pavel Machek
  0 siblings, 0 replies; 220+ messages in thread
From: Pavel Machek @ 2009-05-28 12:48 UTC (permalink / raw)
  To: Larry H.; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Hi!

> Index: linux-2.6/mm/Kconfig
> ===================================================================
> --- linux-2.6.orig/mm/Kconfig
> +++ linux-2.6/mm/Kconfig
> @@ -155,6 +155,26 @@ config PAGEFLAGS_EXTENDED
>  	def_bool y
>  	depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM
>  
> +config PAGE_SENSITIVE
> +	bool "Support for selective page sanitization"
> +	help
> +	 This option provides support for honoring the sensitive bit
> +	 in the low level page allocator. This bit is used to mark
> +	 pages that will contain sensitive information (such as
> +	 cryptographic secrets and credentials).
> +
> +	 Pages marked with the sensitive bit will be sanitized upon
> +	 release, to prevent information leaks and data remanence that
> +	 could allow Iceman/coldboot attacks to reveal such data.
> +
> +	 If you are unsure, select N. This option might introduce a
> +	 minimal performance impact on those subsystems that make
> +	 use of the flag associated with the sensitive bit.
> +
> +	 If you use the cryptographic API or want to prevent tty
> +	 information leaks locally, you most likely want to enable
> +	 this.

This should not be configurable. Runtime config, defaulting to
'sanitize' may make some sense, but... better just be secure.

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28 12:48   ` Pavel Machek
@ 2009-05-28 12:55     ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-28 12:55 UTC (permalink / raw)
  To: Pavel Machek; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Hi Pavel,

> This should not be configurable. Runtime config, defaulting to
> 'sanitize' may make some sense, but... better just be secure.

We've since moved to an unconditional page sanitization approach,
enabled via boot option. Check out the latest patches in the thread,
don't bother checking the initial page flag ones since there's no
intention to follow that path for now.

Thanks for taking a look.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 12:55     ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-28 12:55 UTC (permalink / raw)
  To: Pavel Machek; +Cc: linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Hi Pavel,

> This should not be configurable. Runtime config, defaulting to
> 'sanitize' may make some sense, but... better just be secure.

We've since moved to an unconditional page sanitization approach,
enabled via boot option. Check out the latest patches in the thread,
don't bother checking the initial page flag ones since there's no
intention to follow that path for now.

Thanks for taking a look.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28  9:08                 ` Ingo Molnar
@ 2009-05-28 18:48                   ` pageexec
  -1 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-28 18:48 UTC (permalink / raw)
  To: Alan Cox, Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Linus Torvalds

On 28 May 2009 at 11:08, Ingo Molnar wrote:

> 
> * Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> 
> > > > As for being swapped out - I do not believe that kernel stacks can 
> > > > ever be swapped out in Linux.
> > > 
> > > yes, i referred to that as an undesirable option - because it slows 
> > > down pthread_create() quite substantially.
> > > 
> > > This needs before/after pthread_create() benchmark results.
> > 
> > kernel stacks can end up places you don't expect on hypervisor 
> > based systems.
> > 
> > In most respects the benchmarks are pretty irrelevant - wiping 
> > stuff has a performance cost, but its the sort of thing you only 
> > want to do when you have a security requirement that needs it. At 
> > that point the performance is secondary.
> 
> Bechmarks, of course, are not irrelevant _at all_.
> 
> So i'm asking for this "clear kernel stacks on freeing" aspect to be 
> benchmarked thoroughly, as i expect it to have a negative impact - 
> otherwise i'm NAK-ing this. Please Cc: me to measurements results.

last year while developing/debugging something else i also ran some kernel
compilation tests and managed to dig out this one for you ('all' refers to
all of PaX):

------------------------------------------------------------------------------------------
make -j4 2.6.24-rc7-i386-pax compiling 2.6.24-rc7-i386-pax (all with SANITIZE, no PARAVIRT)
565.63user 68.52system 5:25.52elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (1major+12486066minor)pagefaults 0swaps

565.10user 68.28system 5:24.72elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+12485742minor)pagefaults 0swaps
------------------------------------------------------------------------------------------
make -j4 2.6.24-rc5-i386-pax compiling 2.6.24-rc5-i386-pax (all but SANITIZE, no PARAVIRT)
559.74user 50.29system 5:12.79elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+12397482minor)pagefaults 0swaps

561.41user 51.91system 5:14.55elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+12396877minor)pagefaults 0swaps
------------------------------------------------------------------------------------------

for the kernel times the overhead is about 68s vs. 51s, or 40% in this particular case.
while i don't know where this workload (the kernel part) falls in the spectrum of real
life workloads, it definitely shows that if you're kernel bound, you should think twice
before using this in production (and there's the real-time latency issue too).


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 18:48                   ` pageexec
  0 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-28 18:48 UTC (permalink / raw)
  To: Alan Cox, Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Linus Torvalds

On 28 May 2009 at 11:08, Ingo Molnar wrote:

> 
> * Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> 
> > > > As for being swapped out - I do not believe that kernel stacks can 
> > > > ever be swapped out in Linux.
> > > 
> > > yes, i referred to that as an undesirable option - because it slows 
> > > down pthread_create() quite substantially.
> > > 
> > > This needs before/after pthread_create() benchmark results.
> > 
> > kernel stacks can end up places you don't expect on hypervisor 
> > based systems.
> > 
> > In most respects the benchmarks are pretty irrelevant - wiping 
> > stuff has a performance cost, but its the sort of thing you only 
> > want to do when you have a security requirement that needs it. At 
> > that point the performance is secondary.
> 
> Bechmarks, of course, are not irrelevant _at all_.
> 
> So i'm asking for this "clear kernel stacks on freeing" aspect to be 
> benchmarked thoroughly, as i expect it to have a negative impact - 
> otherwise i'm NAK-ing this. Please Cc: me to measurements results.

last year while developing/debugging something else i also ran some kernel
compilation tests and managed to dig out this one for you ('all' refers to
all of PaX):

------------------------------------------------------------------------------------------
make -j4 2.6.24-rc7-i386-pax compiling 2.6.24-rc7-i386-pax (all with SANITIZE, no PARAVIRT)
565.63user 68.52system 5:25.52elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (1major+12486066minor)pagefaults 0swaps

565.10user 68.28system 5:24.72elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+12485742minor)pagefaults 0swaps
------------------------------------------------------------------------------------------
make -j4 2.6.24-rc5-i386-pax compiling 2.6.24-rc5-i386-pax (all but SANITIZE, no PARAVIRT)
559.74user 50.29system 5:12.79elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+12397482minor)pagefaults 0swaps

561.41user 51.91system 5:14.55elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+12396877minor)pagefaults 0swaps
------------------------------------------------------------------------------------------

for the kernel times the overhead is about 68s vs. 51s, or 40% in this particular case.
while i don't know where this workload (the kernel part) falls in the spectrum of real
life workloads, it definitely shows that if you're kernel bound, you should think twice
before using this in production (and there's the real-time latency issue too).

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-23 15:56                   ` Arjan van de Ven
@ 2009-05-28 19:36                     ` Peter Zijlstra
  -1 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-28 19:36 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Alan Cox, Larry H.,
	Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On Sat, 2009-05-23 at 08:56 -0700, Arjan van de Ven wrote:
> On Sat, 23 May 2009 09:09:10 +0100
> Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> 
> > > Enabling SLAB poisoning by default will be a bad idea
> > 
> > Why ?
> > 
> > > I looked for unused/re-usable flags too, but found none. It's
> > > interesting to see SLUB and SLOB have their own page flags. Did
> > > anybody oppose those when they were proposed? 
> > 
> > Certainly they were looked at - but the memory allocator is right at
> > the core of the system rather than an add on.
> > 
> > > > Ditto - which is why I'm coming from the position of an "if we
> > > > free it clear it" option. If you need that kind of security the
> > > > cost should be more than acceptable - especially with modern
> > > > processors that can do cache bypass on the clears.
> > > 
> > > Are you proposing that we should simply remove the confidential
> > > flags and just stick to the unconditional sanitization when the
> > > boot option is enabled? If positive, it will make things more
> > > simple and definitely is better than nothing. I would have (still)
> > > preferred the other old approach to be merged, but whatever works
> > > at this point.
> > 
> > I am because
> > - its easy to merge
> > - its non controversial
> > - it meets the security good practice and means we don't miss any
> >   alloc/free cases
> > - it avoid providing flags to help a trojan identify "interesting"
> > data to acquire
> > - modern cpu memory clearing can be very cheap
> 
> ... and if we zero on free, we don't need to zero on allocate.
> While this is a little controversial, it does mean that at least part of
> the cost is just time-shifted, which means it'll not be TOO bad
> hopefully...

zero on allocate has the advantage of cache hotness, we're going to use
the memory, why else allocate it.

zero on free only causes extra cache evictions for no gain.



^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 19:36                     ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-28 19:36 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Alan Cox, Larry H.,
	Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On Sat, 2009-05-23 at 08:56 -0700, Arjan van de Ven wrote:
> On Sat, 23 May 2009 09:09:10 +0100
> Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> 
> > > Enabling SLAB poisoning by default will be a bad idea
> > 
> > Why ?
> > 
> > > I looked for unused/re-usable flags too, but found none. It's
> > > interesting to see SLUB and SLOB have their own page flags. Did
> > > anybody oppose those when they were proposed? 
> > 
> > Certainly they were looked at - but the memory allocator is right at
> > the core of the system rather than an add on.
> > 
> > > > Ditto - which is why I'm coming from the position of an "if we
> > > > free it clear it" option. If you need that kind of security the
> > > > cost should be more than acceptable - especially with modern
> > > > processors that can do cache bypass on the clears.
> > > 
> > > Are you proposing that we should simply remove the confidential
> > > flags and just stick to the unconditional sanitization when the
> > > boot option is enabled? If positive, it will make things more
> > > simple and definitely is better than nothing. I would have (still)
> > > preferred the other old approach to be merged, but whatever works
> > > at this point.
> > 
> > I am because
> > - its easy to merge
> > - its non controversial
> > - it meets the security good practice and means we don't miss any
> >   alloc/free cases
> > - it avoid providing flags to help a trojan identify "interesting"
> > data to acquire
> > - modern cpu memory clearing can be very cheap
> 
> ... and if we zero on free, we don't need to zero on allocate.
> While this is a little controversial, it does mean that at least part of
> the cost is just time-shifted, which means it'll not be TOO bad
> hopefully...

zero on allocate has the advantage of cache hotness, we're going to use
the memory, why else allocate it.

zero on free only causes extra cache evictions for no gain.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28 11:50                   ` Alan Cox
@ 2009-05-28 19:44                     ` Peter Zijlstra
  -1 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-28 19:44 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

On Thu, 2009-05-28 at 12:50 +0100, Alan Cox wrote:
> The performance cost of such a security action are NIL when the feature
> is disabled. So the performance cost in the general case is irrelevant.

Not really, much of the code posted in this thread has the form:

int sanitize_all_mem; /* note the lack of __read_mostly */

void some_existing_function()
{
	if (sanitize_all_mem) { /* extra branch */
		/* do stuff */
	}
}

void sanitize_obj(void *obj)
{
	if (!sanitize_all_mem) /* extra branch */
		return;

	/* do stuff */
}


void another_existing_function()
{
	sanitize_obj(obj); /* extra call */
}

That doesn't equal NIL, that equals extra function calls and branches.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 19:44                     ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-28 19:44 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

On Thu, 2009-05-28 at 12:50 +0100, Alan Cox wrote:
> The performance cost of such a security action are NIL when the feature
> is disabled. So the performance cost in the general case is irrelevant.

Not really, much of the code posted in this thread has the form:

int sanitize_all_mem; /* note the lack of __read_mostly */

void some_existing_function()
{
	if (sanitize_all_mem) { /* extra branch */
		/* do stuff */
	}
}

void sanitize_obj(void *obj)
{
	if (!sanitize_all_mem) /* extra branch */
		return;

	/* do stuff */
}


void another_existing_function()
{
	sanitize_obj(obj); /* extra call */
}

That doesn't equal NIL, that equals extra function calls and branches.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28 19:36                     ` Peter Zijlstra
@ 2009-05-29 14:32                       ` Arjan van de Ven
  -1 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-29 14:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alan Cox, Larry H.,
	Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On Thu, 28 May 2009 21:36:01 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> > ... and if we zero on free, we don't need to zero on allocate.
> > While this is a little controversial, it does mean that at least
> > part of the cost is just time-shifted, which means it'll not be TOO
> > bad hopefully...
> 
> zero on allocate has the advantage of cache hotness, we're going to
> use the memory, why else allocate it.

that is why I said it's controversial.

BUT if you zero on free anyway...

And I don't think it's as big a deal as you make it.
Why?

We recycle pages in LIFO order. And L2 caches are big.

So if you zero on free, the next allocation will reuse the zeroed page.
And due to LIFO that is not too far out "often", which makes it likely
the page is still in L2 cache.

The other thing is that zero-on-allocate puts the WHOLE page in L1,
while you can study how much of that page is actually used on average,
and it'll be a percentage lower than 100%.
In fact, if it IS 100%, you shouldn't have put it in L1 because the app
does that anyway. If it is not 100% you just blew a chunk of your L1
for no value.

Don't get me wrong, I'm not arguing that zero-on-free is better, I'm
just trying to point out that the "advantage" of zero-on-allocate isn't
nearly as big as people sometimes think it is...




-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-29 14:32                       ` Arjan van de Ven
  0 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-29 14:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alan Cox, Larry H.,
	Ingo Molnar, Rik van Riel, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar, pageexec

On Thu, 28 May 2009 21:36:01 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> > ... and if we zero on free, we don't need to zero on allocate.
> > While this is a little controversial, it does mean that at least
> > part of the cost is just time-shifted, which means it'll not be TOO
> > bad hopefully...
> 
> zero on allocate has the advantage of cache hotness, we're going to
> use the memory, why else allocate it.

that is why I said it's controversial.

BUT if you zero on free anyway...

And I don't think it's as big a deal as you make it.
Why?

We recycle pages in LIFO order. And L2 caches are big.

So if you zero on free, the next allocation will reuse the zeroed page.
And due to LIFO that is not too far out "often", which makes it likely
the page is still in L2 cache.

The other thing is that zero-on-allocate puts the WHOLE page in L1,
while you can study how much of that page is actually used on average,
and it'll be a percentage lower than 100%.
In fact, if it IS 100%, you shouldn't have put it in L1 because the app
does that anyway. If it is not 100% you just blew a chunk of your L1
for no value.

Don't get me wrong, I'm not arguing that zero-on-free is better, I'm
just trying to point out that the "advantage" of zero-on-allocate isn't
nearly as big as people sometimes think it is...




-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-20 21:24     ` Larry H.
@ 2009-05-29 22:58       ` Andrew Morton
  -1 siblings, 0 replies; 220+ messages in thread
From: Andrew Morton @ 2009-05-29 22:58 UTC (permalink / raw)
  To: Larry H.; +Cc: peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On Wed, 20 May 2009 14:24:13 -0700
"Larry H." <research@subreption.com> wrote:

> Your
> approach means forcing all developers to remember where they have to
> place this explicit clearing, and introducing unnecessary code
> duplication and an ever growing list of places adding these calls.

And your proposed approach requires that developers remember to use
GFP_SENSITIVE at allocation time.  In well-implemented code, there is a
single memory-freeing site, so there's really no difference here.

Other problems I see with the patch are:

- Adds a test-n-branch to all page-freeing operations.  Ouch.  The
  current approach avoids that cost.

- Fails to handle kmalloc()'ed memory.  Fixing this will probably
  require adding a test-n-branch to kmem_cache_alloc().  Ouch * N.

- Once kmalloc() is fixed, the page-allocator changes and
  GFP_SENSITIVE itself can perhaps go away - I expect that little
  security-sensitive memory is allocated direct from the page
  allocator.  Most callsites are probably using
  kmalloc()/kmem_cache_alloc() (might be wrong).

  If not wrong then we end up with a single requirement: zap the
  memory in kmem_cache_free().

  But how to do that?  Particular callsites don't get to alter
  kfree()'s behaviour.  So they'd need to use a new kfree_sensitive(). 
  Which is just syntactic sugar around the code whihc we presently
  implement.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-29 22:58       ` Andrew Morton
  0 siblings, 0 replies; 220+ messages in thread
From: Andrew Morton @ 2009-05-29 22:58 UTC (permalink / raw)
  To: Larry H.; +Cc: peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On Wed, 20 May 2009 14:24:13 -0700
"Larry H." <research@subreption.com> wrote:

> Your
> approach means forcing all developers to remember where they have to
> place this explicit clearing, and introducing unnecessary code
> duplication and an ever growing list of places adding these calls.

And your proposed approach requires that developers remember to use
GFP_SENSITIVE at allocation time.  In well-implemented code, there is a
single memory-freeing site, so there's really no difference here.

Other problems I see with the patch are:

- Adds a test-n-branch to all page-freeing operations.  Ouch.  The
  current approach avoids that cost.

- Fails to handle kmalloc()'ed memory.  Fixing this will probably
  require adding a test-n-branch to kmem_cache_alloc().  Ouch * N.

- Once kmalloc() is fixed, the page-allocator changes and
  GFP_SENSITIVE itself can perhaps go away - I expect that little
  security-sensitive memory is allocated direct from the page
  allocator.  Most callsites are probably using
  kmalloc()/kmem_cache_alloc() (might be wrong).

  If not wrong then we end up with a single requirement: zap the
  memory in kmem_cache_free().

  But how to do that?  Particular callsites don't get to alter
  kfree()'s behaviour.  So they'd need to use a new kfree_sensitive(). 
  Which is just syntactic sugar around the code whihc we presently
  implement.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-29 14:32                       ` Arjan van de Ven
@ 2009-05-30  5:48                         ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  5:48 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Peter Zijlstra, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On 07:32 Fri 29 May     , Arjan van de Ven wrote:
> On Thu, 28 May 2009 21:36:01 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > > ... and if we zero on free, we don't need to zero on allocate.
> > > While this is a little controversial, it does mean that at least
> > > part of the cost is just time-shifted, which means it'll not be TOO
> > > bad hopefully...
> > 
> > zero on allocate has the advantage of cache hotness, we're going to
> > use the memory, why else allocate it.

Because zero on allocate kills the very purpose of this patch and it has
obvious security implications. Like races (in information leak
scenarios, that is). What happens in-between the release of the page and
the new allocation that yields the same page? What happens if no further
allocations happen in a while (that can return the old page again)?
That's the idea.

> that is why I said it's controversial.
> 
> BUT if you zero on free anyway...
> 
> And I don't think it's as big a deal as you make it.
> Why?
> 
> We recycle pages in LIFO order. And L2 caches are big.
> 
> So if you zero on free, the next allocation will reuse the zeroed page.
> And due to LIFO that is not too far out "often", which makes it likely
> the page is still in L2 cache.

Thanks for pointing this out clearly, Arjan.

> The other thing is that zero-on-allocate puts the WHOLE page in L1,
> while you can study how much of that page is actually used on average,
> and it'll be a percentage lower than 100%.
> In fact, if it IS 100%, you shouldn't have put it in L1 because the app
> does that anyway. If it is not 100% you just blew a chunk of your L1
> for no value.
> 
> Don't get me wrong, I'm not arguing that zero-on-free is better, I'm
> just trying to point out that the "advantage" of zero-on-allocate isn't
> nearly as big as people sometimes think it is...

Performance-wise, I agree with you here. Security-wise, I assure
you that clearing on allocation time is most certainly hopeless.

If there's further room for improvement in the patch, and something can
be optimized, I will do my best with it. I won't be able to provide any
updates until Sunday, likely. I'll do a kernbench, if someone has
further benchmarks to suggest, please let me know.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  5:48                         ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  5:48 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Peter Zijlstra, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On 07:32 Fri 29 May     , Arjan van de Ven wrote:
> On Thu, 28 May 2009 21:36:01 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > > ... and if we zero on free, we don't need to zero on allocate.
> > > While this is a little controversial, it does mean that at least
> > > part of the cost is just time-shifted, which means it'll not be TOO
> > > bad hopefully...
> > 
> > zero on allocate has the advantage of cache hotness, we're going to
> > use the memory, why else allocate it.

Because zero on allocate kills the very purpose of this patch and it has
obvious security implications. Like races (in information leak
scenarios, that is). What happens in-between the release of the page and
the new allocation that yields the same page? What happens if no further
allocations happen in a while (that can return the old page again)?
That's the idea.

> that is why I said it's controversial.
> 
> BUT if you zero on free anyway...
> 
> And I don't think it's as big a deal as you make it.
> Why?
> 
> We recycle pages in LIFO order. And L2 caches are big.
> 
> So if you zero on free, the next allocation will reuse the zeroed page.
> And due to LIFO that is not too far out "often", which makes it likely
> the page is still in L2 cache.

Thanks for pointing this out clearly, Arjan.

> The other thing is that zero-on-allocate puts the WHOLE page in L1,
> while you can study how much of that page is actually used on average,
> and it'll be a percentage lower than 100%.
> In fact, if it IS 100%, you shouldn't have put it in L1 because the app
> does that anyway. If it is not 100% you just blew a chunk of your L1
> for no value.
> 
> Don't get me wrong, I'm not arguing that zero-on-free is better, I'm
> just trying to point out that the "advantage" of zero-on-allocate isn't
> nearly as big as people sometimes think it is...

Performance-wise, I agree with you here. Security-wise, I assure
you that clearing on allocation time is most certainly hopeless.

If there's further room for improvement in the patch, and something can
be optimized, I will do my best with it. I won't be able to provide any
updates until Sunday, likely. I'll do a kernbench, if someone has
further benchmarks to suggest, please let me know.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-29 22:58       ` Andrew Morton
  (?)
@ 2009-05-30  7:00       ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  7:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On 15:58 Fri 29 May     , Andrew Morton wrote:
> And your proposed approach requires that developers remember to use
> GFP_SENSITIVE at allocation time.  In well-implemented code, there is a
> single memory-freeing site, so there's really no difference here.

In the current (latest) patch, unconditional sanitization is enabled via
boot time option. There's no page flag now, neither GFP_CONFIDENTIAL
since it is useless without the page flag.

> Other problems I see with the patch are:
> 
> - Adds a test-n-branch to all page-freeing operations.  Ouch.  The
>   current approach avoids that cost.
> 
> - Fails to handle kmalloc()'ed memory.  Fixing this will probably
>   require adding a test-n-branch to kmem_cache_alloc().  Ouch * N.

For the GFP_CONFIDENTIAL flag? Not there anymore. If you meant clearing
on allocation, that's hopeless. The current patch doesn't touch the
kmalloc layer, though I submitted a second one that takes care of
kfree/kmem_cache_free. Peter has objected to adding more branches
there...

> - Once kmalloc() is fixed, the page-allocator changes and
>   GFP_SENSITIVE itself can perhaps go away - I expect that little
>   security-sensitive memory is allocated direct from the page
>   allocator.  Most callsites are probably using
>   kmalloc()/kmem_cache_alloc() (might be wrong).

None of the currently hot spots use private caches, they use the
standard ones through kmalloc. Having separate caches for each of these
hot spots is beyond overkill and will have a higher performance hit than
any of the current or past patches I submitted.

>   If not wrong then we end up with a single requirement: zap the
>   memory in kmem_cache_free().

Done in the last patchset I submitted. There's an issue there: Peter
raised questions about the branches I introduced... truth is, those are
there (in sanitize_obj) to make sure we are dealing with a valid object
pointer. kmem_cache_free lacks these checks (albeit kfree has them)...

I'm not sure why they aren't there. In sanitize_obj we can skip those
since kfree takes care of it, but we should probably add them to
kmem_cache_free.

So this is what I propose:

	1. We remove sanitize_obj, saving the test branches there and
	any pointer validation (at the expense of trusting it in
	kmem_cache_free). No extra call depth. We will duplicate the
	clearing when the object is the last in the slab (a put_page
	ensues and the page allocator sanitizes it there).

	2. We move the memset to kfree and kmem_cache_free, and use a
	single test branch for sanitize_all_mem.

Should keep the instruction counting fellows happy. Is this acceptable
for you now?

>   But how to do that?  Particular callsites don't get to alter
>   kfree()'s behaviour.  So they'd need to use a new kfree_sensitive(). 
>   Which is just syntactic sugar around the code whihc we presently
>   implement.

This could work, but again it won't do anything unless sanitization is
enabled on boot time (or it should be independent). And we changed the
naming from sensitive to confidential, since some people opposed the
former.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-29 22:58       ` Andrew Morton
@ 2009-05-30  7:12         ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:12 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Larry H., peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

Hi Andrew,

On Sat, May 30, 2009 at 1:58 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
>  But how to do that?  Particular callsites don't get to alter
>  kfree()'s behaviour.  So they'd need to use a new kfree_sensitive().
>  Which is just syntactic sugar around the code whihc we presently
>  implement.

Unless I am missing something here, we already have kfree_sensitive(),
we just call it kzfree().

                                 Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:12         ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:12 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Larry H., peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

Hi Andrew,

On Sat, May 30, 2009 at 1:58 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
>  But how to do that?  Particular callsites don't get to alter
>  kfree()'s behaviour.  So they'd need to use a new kfree_sensitive().
>  Which is just syntactic sugar around the code whihc we presently
>  implement.

Unless I am missing something here, we already have kfree_sensitive(),
we just call it kzfree().

                                 Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:12         ` Pekka Enberg
@ 2009-05-30  7:35           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  7:35 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Andrew Morton, Larry H.,
	peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On 10:12 Sat 30 May     , Pekka Enberg wrote:
> Hi Andrew,
> 
> On Sat, May 30, 2009 at 1:58 AM, Andrew Morton
> <akpm@linux-foundation.org> wrote:
> > ?But how to do that? ?Particular callsites don't get to alter
> > ?kfree()'s behaviour. ?So they'd need to use a new kfree_sensitive().
> > ?Which is just syntactic sugar around the code whihc we presently
> > ?implement.
> 
> Unless I am missing something here, we already have kfree_sensitive(),
> we just call it kzfree().

You should test that. The results might be surprising, though.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:35           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  7:35 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Andrew Morton, Larry H.,
	peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On 10:12 Sat 30 May     , Pekka Enberg wrote:
> Hi Andrew,
> 
> On Sat, May 30, 2009 at 1:58 AM, Andrew Morton
> <akpm@linux-foundation.org> wrote:
> > ?But how to do that? ?Particular callsites don't get to alter
> > ?kfree()'s behaviour. ?So they'd need to use a new kfree_sensitive().
> > ?Which is just syntactic sugar around the code whihc we presently
> > ?implement.
> 
> Unless I am missing something here, we already have kfree_sensitive(),
> we just call it kzfree().

You should test that. The results might be surprising, though.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-28 11:50                   ` Alan Cox
@ 2009-05-30  7:35                     ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:35 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

On Thu, May 28, 2009 at 2:50 PM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> The performance cost of such a security action are NIL when the feature
> is disabled. So the performance cost in the general case is irrelevant.
>
> If you need this kind of data wiping then the performance hit
> is basically irrelevant, the security comes first. You can NAK it all you
> like but it simply means that such users either have to apply patches or
> run something else.
>
> If it harmed general user performance you'd have a point - but its like
> SELinux you don't have to use it if you don't need the feature. Which it
> must be said is a lot better than much of the scheduler crud that has
> appeared over time which you can't make go away.

The GFP_SENSITIVE flag looks like a big hammer that we don't really
need IMHO. It seems to me that most of the actual call-sites (crypto
code, wireless keys, etc.) should probably just use kzfree()
unconditionally to make sure we don't leak sensitive data. I did not
look too closely but I don't think any of the sensitive kfree() calls
are in fastpaths so the performance impact is negligible.

                                Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:35                     ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:35 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

On Thu, May 28, 2009 at 2:50 PM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> The performance cost of such a security action are NIL when the feature
> is disabled. So the performance cost in the general case is irrelevant.
>
> If you need this kind of data wiping then the performance hit
> is basically irrelevant, the security comes first. You can NAK it all you
> like but it simply means that such users either have to apply patches or
> run something else.
>
> If it harmed general user performance you'd have a point - but its like
> SELinux you don't have to use it if you don't need the feature. Which it
> must be said is a lot better than much of the scheduler crud that has
> appeared over time which you can't make go away.

The GFP_SENSITIVE flag looks like a big hammer that we don't really
need IMHO. It seems to me that most of the actual call-sites (crypto
code, wireless keys, etc.) should probably just use kzfree()
unconditionally to make sure we don't leak sensitive data. I did not
look too closely but I don't think any of the sensitive kfree() calls
are in fastpaths so the performance impact is negligible.

                                Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30  7:35           ` Larry H.
@ 2009-05-30  7:39             ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:39 UTC (permalink / raw)
  To: Larry H.
  Cc: Andrew Morton, peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On Sat, May 30, 2009 at 1:58 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> > > ?But how to do that? ?Particular callsites don't get to alter
> > > ?kfree()'s behaviour. ?So they'd need to use a new kfree_sensitive().
> > > ?Which is just syntactic sugar around the code whihc we presently
> > > ?implement.

On 10:12 Sat 30 May, Pekka Enberg wrote:
>> Unless I am missing something here, we already have kfree_sensitive(),
>> we just call it kzfree().

On Sat, May 30, 2009 at 10:35 AM, Larry H. <research@subreption.com> wrote:
> You should test that. The results might be surprising, though.

So what's the problem with it?

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:39             ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:39 UTC (permalink / raw)
  To: Larry H.
  Cc: Andrew Morton, peterz, linux-kernel, torvalds, linux-mm, mingo, pageexec

On Sat, May 30, 2009 at 1:58 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> > > ?But how to do that? ?Particular callsites don't get to alter
> > > ?kfree()'s behaviour. ?So they'd need to use a new kfree_sensitive().
> > > ?Which is just syntactic sugar around the code whihc we presently
> > > ?implement.

On 10:12 Sat 30 May, Pekka Enberg wrote:
>> Unless I am missing something here, we already have kfree_sensitive(),
>> we just call it kzfree().

On Sat, May 30, 2009 at 10:35 AM, Larry H. <research@subreption.com> wrote:
> You should test that. The results might be surprising, though.

So what's the problem with it?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:35                     ` Pekka Enberg
@ 2009-05-30  7:50                       ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  7:50 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:35 Sat 30 May     , Pekka Enberg wrote:
> The GFP_SENSITIVE flag looks like a big hammer that we don't really
> need IMHO. It seems to me that most of the actual call-sites (crypto
> code, wireless keys, etc.) should probably just use kzfree()
> unconditionally to make sure we don't leak sensitive data. I did not
> look too closely but I don't think any of the sensitive kfree() calls
> are in fastpaths so the performance impact is negligible.

That's hopeless, and kzfree is broken. Like I said in my earlier reply,
please test that yourself to see the results. Whoever wrote that ignored
how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
before, it should have been noticed long time ago.

It's called disregard when you ditch something in favor of something
else you have assumed to be better, when it isn't. That's not polite.

Furthermore, selective clearing doesn't solve the roots of the problem.
It's just adding bandages to a wound which never stops bleeding. I
proposed an initial page flag because we could use it later for
unconditional page clearing doing a one line change in a header file.

I see a lot of speculation on what works and what doesn't, but
there isn't much on the practical side of things, yet. I provided test
results that proved some of the comments wrong, and I've referenced
literature which shows the reasoning behind all this. What else can I do
to make you understand you are missing the point here?

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:50                       ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  7:50 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:35 Sat 30 May     , Pekka Enberg wrote:
> The GFP_SENSITIVE flag looks like a big hammer that we don't really
> need IMHO. It seems to me that most of the actual call-sites (crypto
> code, wireless keys, etc.) should probably just use kzfree()
> unconditionally to make sure we don't leak sensitive data. I did not
> look too closely but I don't think any of the sensitive kfree() calls
> are in fastpaths so the performance impact is negligible.

That's hopeless, and kzfree is broken. Like I said in my earlier reply,
please test that yourself to see the results. Whoever wrote that ignored
how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
before, it should have been noticed long time ago.

It's called disregard when you ditch something in favor of something
else you have assumed to be better, when it isn't. That's not polite.

Furthermore, selective clearing doesn't solve the roots of the problem.
It's just adding bandages to a wound which never stops bleeding. I
proposed an initial page flag because we could use it later for
unconditional page clearing doing a one line change in a header file.

I see a lot of speculation on what works and what doesn't, but
there isn't much on the practical side of things, yet. I provided test
results that proved some of the comments wrong, and I've referenced
literature which shows the reasoning behind all this. What else can I do
to make you understand you are missing the point here?

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:50                       ` Larry H.
@ 2009-05-30  7:53                         ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:53 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Hi Larry,

On 10:35 Sat 30 May, Pekka Enberg wrote:
>> The GFP_SENSITIVE flag looks like a big hammer that we don't really
>> need IMHO. It seems to me that most of the actual call-sites (crypto
>> code, wireless keys, etc.) should probably just use kzfree()
>> unconditionally to make sure we don't leak sensitive data. I did not
>> look too closely but I don't think any of the sensitive kfree() calls
>> are in fastpaths so the performance impact is negligible.

Larry H. wrote:
> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
> please test that yourself to see the results. Whoever wrote that ignored
> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
> before, it should have been noticed long time ago.

An open-coded version of kzfree was being used in the kernel:

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53

Can we now get to the part where you explain how it's broken because I 
obviously "ignored how SLAB/SLUB works"?

Thanks!

			Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:53                         ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:53 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Hi Larry,

On 10:35 Sat 30 May, Pekka Enberg wrote:
>> The GFP_SENSITIVE flag looks like a big hammer that we don't really
>> need IMHO. It seems to me that most of the actual call-sites (crypto
>> code, wireless keys, etc.) should probably just use kzfree()
>> unconditionally to make sure we don't leak sensitive data. I did not
>> look too closely but I don't think any of the sensitive kfree() calls
>> are in fastpaths so the performance impact is negligible.

Larry H. wrote:
> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
> please test that yourself to see the results. Whoever wrote that ignored
> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
> before, it should have been noticed long time ago.

An open-coded version of kzfree was being used in the kernel:

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53

Can we now get to the part where you explain how it's broken because I 
obviously "ignored how SLAB/SLUB works"?

Thanks!

			Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:50                       ` Larry H.
@ 2009-05-30  7:57                         ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:57 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Larry H. wrote:
> Furthermore, selective clearing doesn't solve the roots of the problem.
> It's just adding bandages to a wound which never stops bleeding. I
> proposed an initial page flag because we could use it later for
> unconditional page clearing doing a one line change in a header file.
> 
> I see a lot of speculation on what works and what doesn't, but
> there isn't much on the practical side of things, yet. I provided test
> results that proved some of the comments wrong, and I've referenced
> literature which shows the reasoning behind all this. What else can I do
> to make you understand you are missing the point here?

Hey, if you want to add a CONFIG_ZERO_ALL_MEMORY_PARANOIA thing that can 
be disabled, go for it! But you have to find someone else to take the 
merge the SLAB bits because, quite frankly, I am not convinced it's 
worth it. And the hand waving you're doing here isn't really helping 
your case, sorry.

			Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  7:57                         ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  7:57 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Larry H. wrote:
> Furthermore, selective clearing doesn't solve the roots of the problem.
> It's just adding bandages to a wound which never stops bleeding. I
> proposed an initial page flag because we could use it later for
> unconditional page clearing doing a one line change in a header file.
> 
> I see a lot of speculation on what works and what doesn't, but
> there isn't much on the practical side of things, yet. I provided test
> results that proved some of the comments wrong, and I've referenced
> literature which shows the reasoning behind all this. What else can I do
> to make you understand you are missing the point here?

Hey, if you want to add a CONFIG_ZERO_ALL_MEMORY_PARANOIA thing that can 
be disabled, go for it! But you have to find someone else to take the 
merge the SLAB bits because, quite frankly, I am not convinced it's 
worth it. And the hand waving you're doing here isn't really helping 
your case, sorry.

			Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:53                         ` Pekka Enberg
@ 2009-05-30  8:20                           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  8:20 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:53 Sat 30 May     , Pekka Enberg wrote:
>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>> please test that yourself to see the results. Whoever wrote that ignored
>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>> before, it should have been noticed long time ago.
>
> An open-coded version of kzfree was being used in the kernel:
>
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>
> Can we now get to the part where you explain how it's broken because I 
> obviously "ignored how SLAB/SLUB works"?

You can find the answer in the code of sanitize_obj, within my kfree
patch. Besides, it would have taken less time for you to write a simple
module that kmallocs and kzfrees a buffer, than writing these two
emails.

Consider the inuse, size, objsize and offset members of a kmem_cache
structure, for further hints. Test the module on a system with SLUB,
though the issue should replicate over SLAB too. And don't dare test it
on SLOB and its wonderful ksize, or even look at the freelist pointer
management within SLUB.

;)

I'm about to recommend Andrew to take a look at this too:
http://marc.info/?l=linux-mm&m=124301548814293&w=2

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  8:20                           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  8:20 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:53 Sat 30 May     , Pekka Enberg wrote:
>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>> please test that yourself to see the results. Whoever wrote that ignored
>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>> before, it should have been noticed long time ago.
>
> An open-coded version of kzfree was being used in the kernel:
>
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>
> Can we now get to the part where you explain how it's broken because I 
> obviously "ignored how SLAB/SLUB works"?

You can find the answer in the code of sanitize_obj, within my kfree
patch. Besides, it would have taken less time for you to write a simple
module that kmallocs and kzfrees a buffer, than writing these two
emails.

Consider the inuse, size, objsize and offset members of a kmem_cache
structure, for further hints. Test the module on a system with SLUB,
though the issue should replicate over SLAB too. And don't dare test it
on SLOB and its wonderful ksize, or even look at the freelist pointer
management within SLUB.

;)

I'm about to recommend Andrew to take a look at this too:
http://marc.info/?l=linux-mm&m=124301548814293&w=2

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30  7:35                     ` Pekka Enberg
@ 2009-05-30  8:31                       ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30  8:31 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> The GFP_SENSITIVE flag looks like a big hammer that we don't really
> need IMHO. It seems to me that most of the actual call-sites (crypto

Actually the flag is a small hammer

> code, wireless keys, etc.) should probably just use kzfree()
> unconditionally to make sure we don't leak sensitive data. I did not
> look too closely but I don't think any of the sensitive kfree() calls
> are in fastpaths so the performance impact is negligible.

The problem is that most sensitive data is user space anyway.
GFP_SENSITIVE or kzfree mean you have to get it right in the kernel and
you don't fix things like stack copies of sensitive data - its a quick
hack which doesn't meet goot security programming practice -it defaults
to insecure which is the wrong way around. Not saying its not a bad idea
to kzfree a few keys and things *but* it's not real security.

If you want to do real security you have a sysfs or build flag that turns
on clearing every page on free. Yes it costs performance (a lot less
nowdays with cache bypassing stores) but for the category of user who
wants to be sure nothing escapes it does the job while kzfree would be
like trying to plug leaks in a sieve.

Alan

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  8:31                       ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30  8:31 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> The GFP_SENSITIVE flag looks like a big hammer that we don't really
> need IMHO. It seems to me that most of the actual call-sites (crypto

Actually the flag is a small hammer

> code, wireless keys, etc.) should probably just use kzfree()
> unconditionally to make sure we don't leak sensitive data. I did not
> look too closely but I don't think any of the sensitive kfree() calls
> are in fastpaths so the performance impact is negligible.

The problem is that most sensitive data is user space anyway.
GFP_SENSITIVE or kzfree mean you have to get it right in the kernel and
you don't fix things like stack copies of sensitive data - its a quick
hack which doesn't meet goot security programming practice -it defaults
to insecure which is the wrong way around. Not saying its not a bad idea
to kzfree a few keys and things *but* it's not real security.

If you want to do real security you have a sysfs or build flag that turns
on clearing every page on free. Yes it costs performance (a lot less
nowdays with cache bypassing stores) but for the category of user who
wants to be sure nothing escapes it does the job while kzfree would be
like trying to plug leaks in a sieve.

Alan

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  8:20                           ` Larry H.
@ 2009-05-30  8:33                             ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  8:33 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Hi Larry,

On 10:53 Sat 30 May, Pekka Enberg wrote:
>>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>>> please test that yourself to see the results. Whoever wrote that ignored
>>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>>> before, it should have been noticed long time ago.
>> An open-coded version of kzfree was being used in the kernel:
>>
>> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>>
>> Can we now get to the part where you explain how it's broken because I 
>> obviously "ignored how SLAB/SLUB works"?

Larry H. wrote:
> You can find the answer in the code of sanitize_obj, within my kfree
> patch. Besides, it would have taken less time for you to write a simple
> module that kmallocs and kzfrees a buffer, than writing these two
> emails.
> 
> Consider the inuse, size, objsize and offset members of a kmem_cache
> structure, for further hints. Test the module on a system with SLUB,
> though the issue should replicate over SLAB too. And don't dare test it
> on SLOB and its wonderful ksize, or even look at the freelist pointer
> management within SLUB.

Thank you for the lesson in slab internals! That said, I did go over 
your patch and am still as confused as ever. I am afraid I have to 
consider this discussion done unless you're willing to share your 
knowledge on the subject.

			Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  8:33                             ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  8:33 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Hi Larry,

On 10:53 Sat 30 May, Pekka Enberg wrote:
>>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>>> please test that yourself to see the results. Whoever wrote that ignored
>>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>>> before, it should have been noticed long time ago.
>> An open-coded version of kzfree was being used in the kernel:
>>
>> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>>
>> Can we now get to the part where you explain how it's broken because I 
>> obviously "ignored how SLAB/SLUB works"?

Larry H. wrote:
> You can find the answer in the code of sanitize_obj, within my kfree
> patch. Besides, it would have taken less time for you to write a simple
> module that kmallocs and kzfrees a buffer, than writing these two
> emails.
> 
> Consider the inuse, size, objsize and offset members of a kmem_cache
> structure, for further hints. Test the module on a system with SLUB,
> though the issue should replicate over SLAB too. And don't dare test it
> on SLOB and its wonderful ksize, or even look at the freelist pointer
> management within SLUB.

Thank you for the lesson in slab internals! That said, I did go over 
your patch and am still as confused as ever. I am afraid I have to 
consider this discussion done unless you're willing to share your 
knowledge on the subject.

			Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30  8:31                       ` Alan Cox
@ 2009-05-30  8:35                         ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  8:35 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

Alan Cox wrote:
> The problem is that most sensitive data is user space anyway.
> GFP_SENSITIVE or kzfree mean you have to get it right in the kernel and
> you don't fix things like stack copies of sensitive data - its a quick
> hack which doesn't meet goot security programming practice -it defaults
> to insecure which is the wrong way around. Not saying its not a bad idea
> to kzfree a few keys and things *but* it's not real security.
> 
> If you want to do real security you have a sysfs or build flag that turns
> on clearing every page on free. Yes it costs performance (a lot less
> nowdays with cache bypassing stores) but for the category of user who
> wants to be sure nothing escapes it does the job while kzfree would be
> like trying to plug leaks in a sieve.

Yup, your suggestion would make one simple patch, for sure. I wonder if 
anyone is actually prepared to enable the thing at run-time, though, 
which is why I suggested doing the "critical" kzfree() ones unconditionally.

			Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  8:35                         ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30  8:35 UTC (permalink / raw)
  To: Alan Cox
  Cc: Ingo Molnar, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

Alan Cox wrote:
> The problem is that most sensitive data is user space anyway.
> GFP_SENSITIVE or kzfree mean you have to get it right in the kernel and
> you don't fix things like stack copies of sensitive data - its a quick
> hack which doesn't meet goot security programming practice -it defaults
> to insecure which is the wrong way around. Not saying its not a bad idea
> to kzfree a few keys and things *but* it's not real security.
> 
> If you want to do real security you have a sysfs or build flag that turns
> on clearing every page on free. Yes it costs performance (a lot less
> nowdays with cache bypassing stores) but for the category of user who
> wants to be sure nothing escapes it does the job while kzfree would be
> like trying to plug leaks in a sieve.

Yup, your suggestion would make one simple patch, for sure. I wonder if 
anyone is actually prepared to enable the thing at run-time, though, 
which is why I suggested doing the "critical" kzfree() ones unconditionally.

			Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:57                         ` Pekka Enberg
@ 2009-05-30  9:05                           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  9:05 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:57 Sat 30 May     , Pekka Enberg wrote:
> Larry H. wrote:
>> Furthermore, selective clearing doesn't solve the roots of the problem.
>> It's just adding bandages to a wound which never stops bleeding. I
>> proposed an initial page flag because we could use it later for
>> unconditional page clearing doing a one line change in a header file.
>> I see a lot of speculation on what works and what doesn't, but
>> there isn't much on the practical side of things, yet. I provided test
>> results that proved some of the comments wrong, and I've referenced
>> literature which shows the reasoning behind all this. What else can I do
>> to make you understand you are missing the point here?
>
> Hey, if you want to add a CONFIG_ZERO_ALL_MEMORY_PARANOIA thing that can be 
> disabled, go for it! But you have to find someone else to take the merge 
> the SLAB bits because, quite frankly, I am not convinced it's worth it. And 
> the hand waving you're doing here isn't really helping your case, sorry.

For a second I thought it was Ingo who was writing this e-mail.
Apologies about the confusion.

What was the technical explanation and reasoning behind your remarks,
again?

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  9:05                           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  9:05 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:57 Sat 30 May     , Pekka Enberg wrote:
> Larry H. wrote:
>> Furthermore, selective clearing doesn't solve the roots of the problem.
>> It's just adding bandages to a wound which never stops bleeding. I
>> proposed an initial page flag because we could use it later for
>> unconditional page clearing doing a one line change in a header file.
>> I see a lot of speculation on what works and what doesn't, but
>> there isn't much on the practical side of things, yet. I provided test
>> results that proved some of the comments wrong, and I've referenced
>> literature which shows the reasoning behind all this. What else can I do
>> to make you understand you are missing the point here?
>
> Hey, if you want to add a CONFIG_ZERO_ALL_MEMORY_PARANOIA thing that can be 
> disabled, go for it! But you have to find someone else to take the merge 
> the SLAB bits because, quite frankly, I am not convinced it's worth it. And 
> the hand waving you're doing here isn't really helping your case, sorry.

For a second I thought it was Ingo who was writing this e-mail.
Apologies about the confusion.

What was the technical explanation and reasoning behind your remarks,
again?

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  8:35                         ` Pekka Enberg
@ 2009-05-30  9:27                           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  9:27 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 11:35 Sat 30 May     , Pekka Enberg wrote:
> Alan Cox wrote:
>> The problem is that most sensitive data is user space anyway.
>> GFP_SENSITIVE or kzfree mean you have to get it right in the kernel and
>> you don't fix things like stack copies of sensitive data - its a quick
>> hack which doesn't meet goot security programming practice -it defaults
>> to insecure which is the wrong way around. Not saying its not a bad idea
>> to kzfree a few keys and things *but* it's not real security.
>> If you want to do real security you have a sysfs or build flag that turns
>> on clearing every page on free. Yes it costs performance (a lot less
>> nowdays with cache bypassing stores) but for the category of user who
>> wants to be sure nothing escapes it does the job while kzfree would be
>> like trying to plug leaks in a sieve.
>
> Yup, your suggestion would make one simple patch, for sure.

This was the first approach taken after Alan and others objected to the
use of a page flag. A patch using a build time config option was
submitted, which is the same way PaX's feature works currently, and Alan
asked for a runtime option instead.

> I wonder if  anyone is actually prepared to enable the thing at run-time, though, which 
> is why I suggested doing the "critical" kzfree() ones unconditionally.

I don't know how many times I need to repeat that if you think the point
here is doing selective sanitization, or that it does any good, you are
totally missing it. Please take some time off and read the past remarks
I made in this thread, especially the analysis of almost a dozen kernel
vulnerabilities which could have been prevented or minimized in terms of
damage (besides coldboot/iceman attacks and so forth, refer to the
Princeton and Stanford papers):

http://marc.info/?l=linux-mm&m=124301548814293&w=2

The very first patchset did change the crypto api, af_key and other
sources to use clearing on release time. Also, regarding your
hesitations about who is prepared to enable full unconditional
sanitization of memory... maybe not you, because you likely don't care
or require this _for yourself_.

Don't assume your perceived level of security risks matches that of the
rest of the real world. This is clearly not something your average
university sysadmin might use. Like Alan put it out nicely, if you need
this, you know why and are well aware of the ups and downs.

Fallacies like this are the basis of every security failure so far.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30  9:27                           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30  9:27 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 11:35 Sat 30 May     , Pekka Enberg wrote:
> Alan Cox wrote:
>> The problem is that most sensitive data is user space anyway.
>> GFP_SENSITIVE or kzfree mean you have to get it right in the kernel and
>> you don't fix things like stack copies of sensitive data - its a quick
>> hack which doesn't meet goot security programming practice -it defaults
>> to insecure which is the wrong way around. Not saying its not a bad idea
>> to kzfree a few keys and things *but* it's not real security.
>> If you want to do real security you have a sysfs or build flag that turns
>> on clearing every page on free. Yes it costs performance (a lot less
>> nowdays with cache bypassing stores) but for the category of user who
>> wants to be sure nothing escapes it does the job while kzfree would be
>> like trying to plug leaks in a sieve.
>
> Yup, your suggestion would make one simple patch, for sure.

This was the first approach taken after Alan and others objected to the
use of a page flag. A patch using a build time config option was
submitted, which is the same way PaX's feature works currently, and Alan
asked for a runtime option instead.

> I wonder if  anyone is actually prepared to enable the thing at run-time, though, which 
> is why I suggested doing the "critical" kzfree() ones unconditionally.

I don't know how many times I need to repeat that if you think the point
here is doing selective sanitization, or that it does any good, you are
totally missing it. Please take some time off and read the past remarks
I made in this thread, especially the analysis of almost a dozen kernel
vulnerabilities which could have been prevented or minimized in terms of
damage (besides coldboot/iceman attacks and so forth, refer to the
Princeton and Stanford papers):

http://marc.info/?l=linux-mm&m=124301548814293&w=2

The very first patchset did change the crypto api, af_key and other
sources to use clearing on release time. Also, regarding your
hesitations about who is prepared to enable full unconditional
sanitization of memory... maybe not you, because you likely don't care
or require this _for yourself_.

Don't assume your perceived level of security risks matches that of the
rest of the real world. This is clearly not something your average
university sysadmin might use. Like Alan put it out nicely, if you need
this, you know why and are well aware of the ups and downs.

Fallacies like this are the basis of every security failure so far.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  5:48                         ` Larry H.
@ 2009-05-30 10:39                           ` Peter Zijlstra
  -1 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 10:39 UTC (permalink / raw)
  To: Larry H.
  Cc: Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Fri, 2009-05-29 at 22:48 -0700, Larry H. wrote:
> On 07:32 Fri 29 May     , Arjan van de Ven wrote:
> > On Thu, 28 May 2009 21:36:01 +0200
> > Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > > > ... and if we zero on free, we don't need to zero on allocate.
> > > > While this is a little controversial, it does mean that at least
> > > > part of the cost is just time-shifted, which means it'll not be TOO
> > > > bad hopefully...
> > > 
> > > zero on allocate has the advantage of cache hotness, we're going to
> > > use the memory, why else allocate it.
> 
> Because zero on allocate kills the very purpose of this patch and it has
> obvious security implications. Like races (in information leak
> scenarios, that is). What happens in-between the release of the page and
> the new allocation that yields the same page? What happens if no further
> allocations happen in a while (that can return the old page again)?
> That's the idea.

I don't get it, these are in-kernel data leaks, you need to be able to
run kernel code to exploit these, if someone can run kernel code, you've
lost anyhow.

Why waste time on this?

> > So if you zero on free, the next allocation will reuse the zeroed page.
> > And due to LIFO that is not too far out "often", which makes it likely
> > the page is still in L2 cache.
> 
> Thanks for pointing this out clearly, Arjan.

Thing is, the time between allocation and use is typically orders of
magnitude less than between free and use. 


Really, get a life, go fix real bugs. Don't make our kernel slower for
wanking rights.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 10:39                           ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 10:39 UTC (permalink / raw)
  To: Larry H.
  Cc: Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Fri, 2009-05-29 at 22:48 -0700, Larry H. wrote:
> On 07:32 Fri 29 May     , Arjan van de Ven wrote:
> > On Thu, 28 May 2009 21:36:01 +0200
> > Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > > > ... and if we zero on free, we don't need to zero on allocate.
> > > > While this is a little controversial, it does mean that at least
> > > > part of the cost is just time-shifted, which means it'll not be TOO
> > > > bad hopefully...
> > > 
> > > zero on allocate has the advantage of cache hotness, we're going to
> > > use the memory, why else allocate it.
> 
> Because zero on allocate kills the very purpose of this patch and it has
> obvious security implications. Like races (in information leak
> scenarios, that is). What happens in-between the release of the page and
> the new allocation that yields the same page? What happens if no further
> allocations happen in a while (that can return the old page again)?
> That's the idea.

I don't get it, these are in-kernel data leaks, you need to be able to
run kernel code to exploit these, if someone can run kernel code, you've
lost anyhow.

Why waste time on this?

> > So if you zero on free, the next allocation will reuse the zeroed page.
> > And due to LIFO that is not too far out "often", which makes it likely
> > the page is still in L2 cache.
> 
> Thanks for pointing this out clearly, Arjan.

Thing is, the time between allocation and use is typically orders of
magnitude less than between free and use. 


Really, get a life, go fix real bugs. Don't make our kernel slower for
wanking rights.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 10:39                           ` Peter Zijlstra
@ 2009-05-30 10:43                             ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 10:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On 12:39 Sat 30 May     , Peter Zijlstra wrote:
> > Because zero on allocate kills the very purpose of this patch and it has
> > obvious security implications. Like races (in information leak
> > scenarios, that is). What happens in-between the release of the page and
> > the new allocation that yields the same page? What happens if no further
> > allocations happen in a while (that can return the old page again)?
> > That's the idea.
> 
> I don't get it, these are in-kernel data leaks, you need to be able to
> run kernel code to exploit these, if someone can run kernel code, you've
> lost anyhow.
> 
> Why waste time on this?

If there were any hesitations about your lack of understanding in
security matters, you just cleared them all with the above statements.

> > > So if you zero on free, the next allocation will reuse the zeroed page.
> > > And due to LIFO that is not too far out "often", which makes it likely
> > > the page is still in L2 cache.
> > 
> > Thanks for pointing this out clearly, Arjan.
> 
> Thing is, the time between allocation and use is typically orders of
> magnitude less than between free and use. 
> 
> 
> Really, get a life, go fix real bugs. Don't make our kernel slower for
> wanking rights.

This is exactly the positive attitude, sound and mature response I was
expecting from you. Thank you.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 10:43                             ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 10:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On 12:39 Sat 30 May     , Peter Zijlstra wrote:
> > Because zero on allocate kills the very purpose of this patch and it has
> > obvious security implications. Like races (in information leak
> > scenarios, that is). What happens in-between the release of the page and
> > the new allocation that yields the same page? What happens if no further
> > allocations happen in a while (that can return the old page again)?
> > That's the idea.
> 
> I don't get it, these are in-kernel data leaks, you need to be able to
> run kernel code to exploit these, if someone can run kernel code, you've
> lost anyhow.
> 
> Why waste time on this?

If there were any hesitations about your lack of understanding in
security matters, you just cleared them all with the above statements.

> > > So if you zero on free, the next allocation will reuse the zeroed page.
> > > And due to LIFO that is not too far out "often", which makes it likely
> > > the page is still in L2 cache.
> > 
> > Thanks for pointing this out clearly, Arjan.
> 
> Thing is, the time between allocation and use is typically orders of
> magnitude less than between free and use. 
> 
> 
> Really, get a life, go fix real bugs. Don't make our kernel slower for
> wanking rights.

This is exactly the positive attitude, sound and mature response I was
expecting from you. Thank you.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 10:39                           ` Peter Zijlstra
@ 2009-05-30 11:42                             ` pageexec
  -1 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-30 11:42 UTC (permalink / raw)
  To: Larry H., Peter Zijlstra
  Cc: Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On 30 May 2009 at 12:39, Peter Zijlstra wrote:

> On Fri, 2009-05-29 at 22:48 -0700, Larry H. wrote:
> > On 07:32 Fri 29 May     , Arjan van de Ven wrote:
> > > On Thu, 28 May 2009 21:36:01 +0200
> > > Peter Zijlstra <peterz@infradead.org> wrote:
> > > 
> > > > > ... and if we zero on free, we don't need to zero on allocate.
> > > > > While this is a little controversial, it does mean that at least
> > > > > part of the cost is just time-shifted, which means it'll not be TOO
> > > > > bad hopefully...
> > > > 
> > > > zero on allocate has the advantage of cache hotness, we're going to
> > > > use the memory, why else allocate it.
> > 
> > Because zero on allocate kills the very purpose of this patch and it has
> > obvious security implications. Like races (in information leak
> > scenarios, that is). What happens in-between the release of the page and
> > the new allocation that yields the same page? What happens if no further
> > allocations happen in a while (that can return the old page again)?
> > That's the idea.
> 
> I don't get it, these are in-kernel data leaks, you need to be able to
> run kernel code to exploit these, if someone can run kernel code, you've
> lost anyhow.
> 
> Why waste time on this?

e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
code (note: already exists, isn't provided by the attacker) gives unintended
kernel memory back to userland, there is a problem. that problem is addressed
in part by early sanitizing of freed data.

> > > So if you zero on free, the next allocation will reuse the zeroed page.
> > > And due to LIFO that is not too far out "often", which makes it likely
> > > the page is still in L2 cache.
> > 
> > Thanks for pointing this out clearly, Arjan.
> 
> Thing is, the time between allocation and use is typically orders of
> magnitude less than between free and use. 

so you are saying that in the sequence of events (free -> alloc -> use) the lifetime
of freed data is overwhelmingly dominated by the free -> alloc interval. this is
*exactly* what sanitization addresses.

also you sort of give away your misunderstanding the threat this patch addresses:
it's not about being 'typically' good, but in every possible case involving freed
data. to give you an idea why 'typically' isn't good enough: imagine you have a
firefox process consuming hundreds of MBs of memory (fact of life, whether fortunate
or not) that then crashes (or the user quits it, doesn't matter). all that data will
be freed on the crash. how long do think it takes for all those hundreds of MBs of
memory to be reused ? in the meantime all your passwords, cryptographic state, etc
are in RAM.

no need to guess actually, just read the paper Larry referenced in his first post:

 http://www.stanford.edu/~blp/papers/shredding.html

one of their experiments showed that around a MB (!) of data of an initial 64 MB
allocation survived for *days*.

> Really, get a life, go fix real bugs. Don't make our kernel slower for
> wanking rights.

ignoring the ad hominem and less than civilized response, the point is not to
slow down everyone. memory sanitization is an option and won't slow down anyone
not explicitly enabling it. if you believe you can actually measure a few extra
conditional jumps in real life workloads, go ahead and show us the numbers.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 11:42                             ` pageexec
  0 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-30 11:42 UTC (permalink / raw)
  To: Larry H., Peter Zijlstra
  Cc: Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On 30 May 2009 at 12:39, Peter Zijlstra wrote:

> On Fri, 2009-05-29 at 22:48 -0700, Larry H. wrote:
> > On 07:32 Fri 29 May     , Arjan van de Ven wrote:
> > > On Thu, 28 May 2009 21:36:01 +0200
> > > Peter Zijlstra <peterz@infradead.org> wrote:
> > > 
> > > > > ... and if we zero on free, we don't need to zero on allocate.
> > > > > While this is a little controversial, it does mean that at least
> > > > > part of the cost is just time-shifted, which means it'll not be TOO
> > > > > bad hopefully...
> > > > 
> > > > zero on allocate has the advantage of cache hotness, we're going to
> > > > use the memory, why else allocate it.
> > 
> > Because zero on allocate kills the very purpose of this patch and it has
> > obvious security implications. Like races (in information leak
> > scenarios, that is). What happens in-between the release of the page and
> > the new allocation that yields the same page? What happens if no further
> > allocations happen in a while (that can return the old page again)?
> > That's the idea.
> 
> I don't get it, these are in-kernel data leaks, you need to be able to
> run kernel code to exploit these, if someone can run kernel code, you've
> lost anyhow.
> 
> Why waste time on this?

e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
code (note: already exists, isn't provided by the attacker) gives unintended
kernel memory back to userland, there is a problem. that problem is addressed
in part by early sanitizing of freed data.

> > > So if you zero on free, the next allocation will reuse the zeroed page.
> > > And due to LIFO that is not too far out "often", which makes it likely
> > > the page is still in L2 cache.
> > 
> > Thanks for pointing this out clearly, Arjan.
> 
> Thing is, the time between allocation and use is typically orders of
> magnitude less than between free and use. 

so you are saying that in the sequence of events (free -> alloc -> use) the lifetime
of freed data is overwhelmingly dominated by the free -> alloc interval. this is
*exactly* what sanitization addresses.

also you sort of give away your misunderstanding the threat this patch addresses:
it's not about being 'typically' good, but in every possible case involving freed
data. to give you an idea why 'typically' isn't good enough: imagine you have a
firefox process consuming hundreds of MBs of memory (fact of life, whether fortunate
or not) that then crashes (or the user quits it, doesn't matter). all that data will
be freed on the crash. how long do think it takes for all those hundreds of MBs of
memory to be reused ? in the meantime all your passwords, cryptographic state, etc
are in RAM.

no need to guess actually, just read the paper Larry referenced in his first post:

 http://www.stanford.edu/~blp/papers/shredding.html

one of their experiments showed that around a MB (!) of data of an initial 64 MB
allocation survived for *days*.

> Really, get a life, go fix real bugs. Don't make our kernel slower for
> wanking rights.

ignoring the ad hominem and less than civilized response, the point is not to
slow down everyone. memory sanitization is an option and won't slow down anyone
not explicitly enabling it. if you believe you can actually measure a few extra
conditional jumps in real life workloads, go ahead and show us the numbers.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 11:42                             ` pageexec
@ 2009-05-30 13:21                               ` Peter Zijlstra
  -1 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 13:21 UTC (permalink / raw)
  To: pageexec
  Cc: Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > Why waste time on this?
> 
> e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> code (note: already exists, isn't provided by the attacker) gives unintended
> kernel memory back to userland, there is a problem. that problem is addressed
> in part by early sanitizing of freed data.

Right, so the whole point is to minimize the impact of actual bugs,
right? So why not focus on fixing those actual bugs? Can we create tools
to help us find such bugs faster? We use sparse for a lot of static
checking, we create things like lockdep and kmemcheck to dynamically
find trouble.

Can we instead of working around a problem, fix the actual problem?


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 13:21                               ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 13:21 UTC (permalink / raw)
  To: pageexec
  Cc: Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > Why waste time on this?
> 
> e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> code (note: already exists, isn't provided by the attacker) gives unintended
> kernel memory back to userland, there is a problem. that problem is addressed
> in part by early sanitizing of freed data.

Right, so the whole point is to minimize the impact of actual bugs,
right? So why not focus on fixing those actual bugs? Can we create tools
to help us find such bugs faster? We use sparse for a lot of static
checking, we create things like lockdep and kmemcheck to dynamically
find trouble.

Can we instead of working around a problem, fix the actual problem?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 13:21                               ` Peter Zijlstra
@ 2009-05-30 13:24                                 ` Peter Zijlstra
  -1 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 13:24 UTC (permalink / raw)
  To: pageexec
  Cc: Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On Sat, 2009-05-30 at 15:21 +0200, Peter Zijlstra wrote:
> On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > > Why waste time on this?
> > 
> > e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> > code (note: already exists, isn't provided by the attacker) gives unintended
> > kernel memory back to userland, there is a problem. that problem is addressed
> > in part by early sanitizing of freed data.
> 
> Right, so the whole point is to minimize the impact of actual bugs,
> right? So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

Also, I'm not at all opposed to make crypto code use kzfree(). That code
knows it had sensitive data in memory, it can wipe the memory when it
frees it -- that makes perfect sense.

Wiping everything because we're too 'lazy' to figure out what really
matters otoh seems silly.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 13:24                                 ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 13:24 UTC (permalink / raw)
  To: pageexec
  Cc: Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On Sat, 2009-05-30 at 15:21 +0200, Peter Zijlstra wrote:
> On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > > Why waste time on this?
> > 
> > e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> > code (note: already exists, isn't provided by the attacker) gives unintended
> > kernel memory back to userland, there is a problem. that problem is addressed
> > in part by early sanitizing of freed data.
> 
> Right, so the whole point is to minimize the impact of actual bugs,
> right? So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

Also, I'm not at all opposed to make crypto code use kzfree(). That code
knows it had sensitive data in memory, it can wipe the memory when it
frees it -- that makes perfect sense.

Wiping everything because we're too 'lazy' to figure out what really
matters otoh seems silly.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 13:21                               ` Peter Zijlstra
@ 2009-05-30 13:54                                 ` pageexec
  -1 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-30 13:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On 30 May 2009 at 15:21, Peter Zijlstra wrote:

> On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > > Why waste time on this?
> > 
> > e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> > code (note: already exists, isn't provided by the attacker) gives unintended
> > kernel memory back to userland, there is a problem. that problem is addressed
> > in part by early sanitizing of freed data.
> 
> Right, so the whole point is to minimize the impact of actual bugs,
> right?

correct. this approach is the manifestation of a particular philosophy
in computer security where instead of finding all bugs, we minimize or,
at times, eliminate their bad sideeffects. non-executable pages, ASLR,
etc are all about this. see below why.

> So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

finding all use-after-free bugs is not possible, as far as i know. the
fundamental problem is that you'd have to find bugs with arbitrary read
sideeffects (which is just as hard a problem as finding bugs with arbitrary
write sideeffects which you'd also have to solve). if you solve these
problems, you'll have solved the most important bug class in computer
security that many decades of academic/industrial/etc research failed at.

since there's no (practical and theoretical) solution in sight for finding
and eliminating such memory handling bugs, we're left with tackling a less
ambitious goal of at least reducing their sideeffects to acceptable levels.

of course there'll be always instances and subclasses of bugs that we can
find by manual or automated inspection, but that only shows that the rest
can only by handled by 'working around the problem'.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 13:54                                 ` pageexec
  0 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-30 13:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, Rik van Riel,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

On 30 May 2009 at 15:21, Peter Zijlstra wrote:

> On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > > Why waste time on this?
> > 
> > e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> > code (note: already exists, isn't provided by the attacker) gives unintended
> > kernel memory back to userland, there is a problem. that problem is addressed
> > in part by early sanitizing of freed data.
> 
> Right, so the whole point is to minimize the impact of actual bugs,
> right?

correct. this approach is the manifestation of a particular philosophy
in computer security where instead of finding all bugs, we minimize or,
at times, eliminate their bad sideeffects. non-executable pages, ASLR,
etc are all about this. see below why.

> So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

finding all use-after-free bugs is not possible, as far as i know. the
fundamental problem is that you'd have to find bugs with arbitrary read
sideeffects (which is just as hard a problem as finding bugs with arbitrary
write sideeffects which you'd also have to solve). if you solve these
problems, you'll have solved the most important bug class in computer
security that many decades of academic/industrial/etc research failed at.

since there's no (practical and theoretical) solution in sight for finding
and eliminating such memory handling bugs, we're left with tackling a less
ambitious goal of at least reducing their sideeffects to acceptable levels.

of course there'll be always instances and subclasses of bugs that we can
find by manual or automated inspection, but that only shows that the rest
can only by handled by 'working around the problem'.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 13:54                                 ` pageexec
@ 2009-05-30 14:04                                   ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 14:04 UTC (permalink / raw)
  To: pageexec
  Cc: Peter Zijlstra, Arjan van de Ven, Alan Cox, Ingo Molnar,
	Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar

On 15:54 Sat 30 May     , pageexec@freemail.hu wrote:
> On 30 May 2009 at 15:21, Peter Zijlstra wrote:
> 
> > On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > > > Why waste time on this?
> > > 
> > > e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> > > code (note: already exists, isn't provided by the attacker) gives unintended
> > > kernel memory back to userland, there is a problem. that problem is addressed
> > > in part by early sanitizing of freed data.
> > 
> > Right, so the whole point is to minimize the impact of actual bugs,
> > right?
> 
> correct. this approach is the manifestation of a particular philosophy
> in computer security where instead of finding all bugs, we minimize or,
> at times, eliminate their bad sideeffects. non-executable pages, ASLR,
> etc are all about this. see below why.
> 
> > So why not focus on fixing those actual bugs? Can we create tools
> > to help us find such bugs faster? We use sparse for a lot of static
> > checking, we create things like lockdep and kmemcheck to dynamically
> > find trouble.
> > 
> > Can we instead of working around a problem, fix the actual problem?
> 
> finding all use-after-free bugs is not possible, as far as i know. the
> fundamental problem is that you'd have to find bugs with arbitrary read
> sideeffects (which is just as hard a problem as finding bugs with arbitrary
> write sideeffects which you'd also have to solve). if you solve these
> problems, you'll have solved the most important bug class in computer
> security that many decades of academic/industrial/etc research failed at.

If Peter can pull this off, I'll ring the red phone and get some VC
contacts going. We will be driving Camaros in no time, and I will
finally ditch my Spyder before it puts an end to my adventure.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 14:04                                   ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 14:04 UTC (permalink / raw)
  To: pageexec
  Cc: Peter Zijlstra, Arjan van de Ven, Alan Cox, Ingo Molnar,
	Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar

On 15:54 Sat 30 May     , pageexec@freemail.hu wrote:
> On 30 May 2009 at 15:21, Peter Zijlstra wrote:
> 
> > On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
> > > > Why waste time on this?
> > > 
> > > e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
> > > code (note: already exists, isn't provided by the attacker) gives unintended
> > > kernel memory back to userland, there is a problem. that problem is addressed
> > > in part by early sanitizing of freed data.
> > 
> > Right, so the whole point is to minimize the impact of actual bugs,
> > right?
> 
> correct. this approach is the manifestation of a particular philosophy
> in computer security where instead of finding all bugs, we minimize or,
> at times, eliminate their bad sideeffects. non-executable pages, ASLR,
> etc are all about this. see below why.
> 
> > So why not focus on fixing those actual bugs? Can we create tools
> > to help us find such bugs faster? We use sparse for a lot of static
> > checking, we create things like lockdep and kmemcheck to dynamically
> > find trouble.
> > 
> > Can we instead of working around a problem, fix the actual problem?
> 
> finding all use-after-free bugs is not possible, as far as i know. the
> fundamental problem is that you'd have to find bugs with arbitrary read
> sideeffects (which is just as hard a problem as finding bugs with arbitrary
> write sideeffects which you'd also have to solve). if you solve these
> problems, you'll have solved the most important bug class in computer
> security that many decades of academic/industrial/etc research failed at.

If Peter can pull this off, I'll ring the red phone and get some VC
contacts going. We will be driving Camaros in no time, and I will
finally ditch my Spyder before it puts an end to my adventure.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 13:21                               ` Peter Zijlstra
@ 2009-05-30 14:08                                 ` Rik van Riel
  -1 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 14:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: pageexec, Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

Peter Zijlstra wrote:
> On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
>>> Why waste time on this?
>> e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
>> code (note: already exists, isn't provided by the attacker) gives unintended
>> kernel memory back to userland, there is a problem. that problem is addressed
>> in part by early sanitizing of freed data.
> 
> Right, so the whole point is to minimize the impact of actual bugs,
> right? So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

Do you drive without seatbelts, because the real fix
is to stay out of accidents?

No software is bug free.

Let me repeat that: no software is bug free.

This means your security strategy cannot rely on
software being bug free.

This is why every security strategy is a "belt and
suspenders" thing, where:
1) code is audited to remove as many bugs as possible, and
2) the system is configured in such a way that the impact
    of the remaining bugs is limited

For example, if you check your own system you will find
that system daemons like bind and httpd run with limited
privileges.  This is done because, again, no software is
bug free and you want to limit the damage that can be done
after a bug is exploited.


-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 14:08                                 ` Rik van Riel
  0 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 14:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: pageexec, Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

Peter Zijlstra wrote:
> On Sat, 2009-05-30 at 13:42 +0200, pageexec@freemail.hu wrote:
>>> Why waste time on this?
>> e.g., when userland executes a syscall, it 'can run kernel code'. if that kernel
>> code (note: already exists, isn't provided by the attacker) gives unintended
>> kernel memory back to userland, there is a problem. that problem is addressed
>> in part by early sanitizing of freed data.
> 
> Right, so the whole point is to minimize the impact of actual bugs,
> right? So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

Do you drive without seatbelts, because the real fix
is to stay out of accidents?

No software is bug free.

Let me repeat that: no software is bug free.

This means your security strategy cannot rely on
software being bug free.

This is why every security strategy is a "belt and
suspenders" thing, where:
1) code is audited to remove as many bugs as possible, and
2) the system is configured in such a way that the impact
    of the remaining bugs is limited

For example, if you check your own system you will find
that system daemons like bind and httpd run with limited
privileges.  This is done because, again, no software is
bug free and you want to limit the damage that can be done
after a bug is exploited.


-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 13:54                                 ` pageexec
@ 2009-05-30 14:13                                   ` Rik van Riel
  -1 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 14:13 UTC (permalink / raw)
  To: pageexec
  Cc: Peter Zijlstra, Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

pageexec@freemail.hu wrote:
> On 30 May 2009 at 15:21, Peter Zijlstra wrote:

>> Can we instead of working around a problem, fix the actual problem?
> 
> finding all use-after-free bugs is not possible, as far as i know.

Not only that, but new ones get introduced all the time.

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 14:13                                   ` Rik van Riel
  0 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 14:13 UTC (permalink / raw)
  To: pageexec
  Cc: Peter Zijlstra, Larry H.,
	Arjan van de Ven, Alan Cox, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

pageexec@freemail.hu wrote:
> On 30 May 2009 at 15:21, Peter Zijlstra wrote:

>> Can we instead of working around a problem, fix the actual problem?
> 
> finding all use-after-free bugs is not possible, as far as i know.

Not only that, but new ones get introduced all the time.

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 13:21                               ` Peter Zijlstra
                                                 ` (3 preceding siblings ...)
  (?)
@ 2009-05-30 14:30                               ` Alan Cox
  2009-05-30 14:45                                   ` Peter Zijlstra
  -1 siblings, 1 reply; 220+ messages in thread
From: Alan Cox @ 2009-05-30 14:30 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: pageexec, Larry H.,
	Arjan van de Ven, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

> Right, so the whole point is to minimize the impact of actual bugs,
> right? So why not focus on fixing those actual bugs? Can we create tools
> to help us find such bugs faster? We use sparse for a lot of static
> checking, we create things like lockdep and kmemcheck to dynamically
> find trouble.
> 
> Can we instead of working around a problem, fix the actual problem?

Why do cars have crashworthiness and seatbelts ? Why not fix the actual
problem (driving errors) ? I mean lets face it they make the vehicle
heavier, less fuel efficient, less fun and more annoying to use.

> Wiping everything because we're too 'lazy' to figure out what really
> matters otoh seems silly.

It isn't about being lazy. A program cannot deduce what is or is not
sensitive. Consider the simple case of writing a highly confidential
document, encrypting it with GPG and deleting (secure deleting even) the
document. 

The chances are you write it in openoffice, which has no idea
it is secure, copies of bits of it end up mashed around by the glibc
allocator and on the stack in pages that then get recycled into kernel
space on page frees. We then clear them later as they go back to user
space - assuming they don't leak or get copied.

For most of us that probabilities of that data not leaking are fine, but
not for all.

The kernel has no idea what data it touches may be confidential and the
user space often doesn't either. Even if it did in a highly secure
environment you want to enumerate what is safe not try and label what is
not.

Think about it this way - is it better to have a root password you guard
carefully, or to give everyone the root password except the list of bad
guys you maintain ?

Alan

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 14:30                               ` Alan Cox
@ 2009-05-30 14:45                                   ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 14:45 UTC (permalink / raw)
  To: Alan Cox
  Cc: pageexec, Larry H.,
	Arjan van de Ven, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

On Sat, 2009-05-30 at 15:30 +0100, Alan Cox wrote:
> > Right, so the whole point is to minimize the impact of actual bugs,
> > right? So why not focus on fixing those actual bugs? Can we create tools
> > to help us find such bugs faster? We use sparse for a lot of static
> > checking, we create things like lockdep and kmemcheck to dynamically
> > find trouble.
> > 
> > Can we instead of working around a problem, fix the actual problem?
> 
> Why do cars have crashworthiness and seatbelts ? Why not fix the actual
> problem (driving errors) ? I mean lets face it they make the vehicle
> heavier, less fuel efficient, less fun and more annoying to use.

We can't find every crash bug either, yet we still ship the kernel and
people actually use it too.

What makes these security bugs so much more important than all the other
ones?

As to the kernel not knowing what might or might not be secure, that's
right, userspace proglet should take their bit of responsibility as
well, we can't fix this in the kernel alone.

As long as that openoffice or firefox instance keeps running, there's
nothing in the world the kernel can do to make it more secure.

If you really write documents that sekrit you simply shouldn't be using
such software but use an editor that is written by people as paranoid as
seems to be advocated here.




^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 14:45                                   ` Peter Zijlstra
  0 siblings, 0 replies; 220+ messages in thread
From: Peter Zijlstra @ 2009-05-30 14:45 UTC (permalink / raw)
  To: Alan Cox
  Cc: pageexec, Larry H.,
	Arjan van de Ven, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

On Sat, 2009-05-30 at 15:30 +0100, Alan Cox wrote:
> > Right, so the whole point is to minimize the impact of actual bugs,
> > right? So why not focus on fixing those actual bugs? Can we create tools
> > to help us find such bugs faster? We use sparse for a lot of static
> > checking, we create things like lockdep and kmemcheck to dynamically
> > find trouble.
> > 
> > Can we instead of working around a problem, fix the actual problem?
> 
> Why do cars have crashworthiness and seatbelts ? Why not fix the actual
> problem (driving errors) ? I mean lets face it they make the vehicle
> heavier, less fuel efficient, less fun and more annoying to use.

We can't find every crash bug either, yet we still ship the kernel and
people actually use it too.

What makes these security bugs so much more important than all the other
ones?

As to the kernel not knowing what might or might not be secure, that's
right, userspace proglet should take their bit of responsibility as
well, we can't fix this in the kernel alone.

As long as that openoffice or firefox instance keeps running, there's
nothing in the world the kernel can do to make it more secure.

If you really write documents that sekrit you simply shouldn't be using
such software but use an editor that is written by people as paranoid as
seems to be advocated here.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 14:45                                   ` Peter Zijlstra
@ 2009-05-30 14:48                                     ` Rik van Riel
  -1 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 14:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alan Cox, pageexec, Larry H.,
	Arjan van de Ven, Ingo Molnar, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

Peter Zijlstra wrote:
> On Sat, 2009-05-30 at 15:30 +0100, Alan Cox wrote:
>>> Right, so the whole point is to minimize the impact of actual bugs,
>>> right? So why not focus on fixing those actual bugs? Can we create tools
>>> to help us find such bugs faster? We use sparse for a lot of static
>>> checking, we create things like lockdep and kmemcheck to dynamically
>>> find trouble.
>>>
>>> Can we instead of working around a problem, fix the actual problem?
>> Why do cars have crashworthiness and seatbelts ? Why not fix the actual
>> problem (driving errors) ? I mean lets face it they make the vehicle
>> heavier, less fuel efficient, less fun and more annoying to use.
> 
> We can't find every crash bug either, yet we still ship the kernel and
> people actually use it too.
> 
> What makes these security bugs so much more important than all the other
> ones?

The fact that we can do something about them.

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 14:48                                     ` Rik van Riel
  0 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 14:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alan Cox, pageexec, Larry H.,
	Arjan van de Ven, Ingo Molnar, linux-kernel, Linus Torvalds,
	linux-mm, Ingo Molnar

Peter Zijlstra wrote:
> On Sat, 2009-05-30 at 15:30 +0100, Alan Cox wrote:
>>> Right, so the whole point is to minimize the impact of actual bugs,
>>> right? So why not focus on fixing those actual bugs? Can we create tools
>>> to help us find such bugs faster? We use sparse for a lot of static
>>> checking, we create things like lockdep and kmemcheck to dynamically
>>> find trouble.
>>>
>>> Can we instead of working around a problem, fix the actual problem?
>> Why do cars have crashworthiness and seatbelts ? Why not fix the actual
>> problem (driving errors) ? I mean lets face it they make the vehicle
>> heavier, less fuel efficient, less fun and more annoying to use.
> 
> We can't find every crash bug either, yet we still ship the kernel and
> people actually use it too.
> 
> What makes these security bugs so much more important than all the other
> ones?

The fact that we can do something about them.

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30  8:20                           ` Larry H.
@ 2009-05-30 15:05                             ` Ray Lee
  -1 siblings, 0 replies; 220+ messages in thread
From: Ray Lee @ 2009-05-30 15:05 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sat, May 30, 2009 at 1:20 AM, Larry H. <research@subreption.com> wrote:
> On 10:53 Sat 30 May     , Pekka Enberg wrote:
>>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>>> please test that yourself to see the results. Whoever wrote that ignored
>>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>>> before, it should have been noticed long time ago.
>>
>> An open-coded version of kzfree was being used in the kernel:
>>
>> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>>
>> Can we now get to the part where you explain how it's broken because I
>> obviously "ignored how SLAB/SLUB works"?
>
> You can find the answer in the code of sanitize_obj, within my kfree
> patch. Besides, it would have taken less time for you to write a simple
> module that kmallocs and kzfrees a buffer, than writing these two
> emails.

How about, for the third time, just sharing that information with the
whole rest of us reading along? Do you really think it's useful for
dozens of us to go do that test, when you already obviously *have*,
and could just share the information?

Please, act like a member of the community and share what you know. If
you're unwilling to do so, that's a huge argument in favor of ignoring
your code, no matter how good or right it might be.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 15:05                             ` Ray Lee
  0 siblings, 0 replies; 220+ messages in thread
From: Ray Lee @ 2009-05-30 15:05 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sat, May 30, 2009 at 1:20 AM, Larry H. <research@subreption.com> wrote:
> On 10:53 Sat 30 May     , Pekka Enberg wrote:
>>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>>> please test that yourself to see the results. Whoever wrote that ignored
>>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>>> before, it should have been noticed long time ago.
>>
>> An open-coded version of kzfree was being used in the kernel:
>>
>> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>>
>> Can we now get to the part where you explain how it's broken because I
>> obviously "ignored how SLAB/SLUB works"?
>
> You can find the answer in the code of sanitize_obj, within my kfree
> patch. Besides, it would have taken less time for you to write a simple
> module that kmallocs and kzfrees a buffer, than writing these two
> emails.

How about, for the third time, just sharing that information with the
whole rest of us reading along? Do you really think it's useful for
dozens of us to go do that test, when you already obviously *have*,
and could just share the information?

Please, act like a member of the community and share what you know. If
you're unwilling to do so, that's a huge argument in favor of ignoring
your code, no matter how good or right it might be.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 14:48                                     ` Rik van Riel
@ 2009-05-30 17:00                                       ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 17:00 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Peter Zijlstra, Alan Cox, pageexec, Arjan van de Ven,
	Ingo Molnar, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Alright, I wrote some small programs to test the data remanence on
memory. It works in a really simple way and functionality is provided by
three different tools:

	scanleak.c
	Scans physical memory (using /dev/mem) looking for the
	ulonglong pattern. Filtered /dev/mem access breaks this, so
	disable it temporarily in your kernel to use it.

	secretleak.c
	Writes the ulonglong pattern to memory (takes two arguments, a size
	in MB, and a number of seconds to delay after free).

	zeromem.c
	Zeroes all or given MBs of memory (allocating blocks of 100M which
	are contiguous). If no argument is given, it allocates more
	blocks to ensure we can zero past the used memory (might hit
	swap). This can be used to simulate memory workload.

I've done the following test (x86 vm, 600M RAM, 2.6.29.4):

	1. ./secretleak 600 1

Will allocate 629145600 bytes (600M).
Zeroing buffer at 0x926bb008... Done.
Writing pattern to 0x926bb008 (402A25246B61654C)... Done.
Freeing buffer at 0x926bb008... Done.
Sleeping for 1 seconds...

	2. (2 minutes afterwards) ./zeromem 700

Zeroing 734003200 bytes of memory
Zeroing block at 0xb1b01008 (104857600 bytes, 102400 kB)
Zeroing block at 0xab700008 (104857600 bytes, 102400 kB)
Zeroing block at 0xa52ff008 (104857600 bytes, 102400 kB)
Zeroing block at 0x9eefe008 (104857600 bytes, 102400 kB)
Zeroing block at 0x98afd008 (104857600 bytes, 102400 kB)
Zeroing block at 0x926fc008 (104857600 bytes, 102400 kB)
Zeroing block at 0x8c2fb008 (104857600 bytes, 102400 kB)
Freeing block at 0xb1b01008.
Freeing block at 0xab700008.
Freeing block at 0xa52ff008.
Freeing block at 0x9eefe008.
Freeing block at 0x98afd008.
Freeing block at 0x926fc008.
Freeing block at 0x8c2fb008.

	3. Immediately afterwards, sudo ./scanleak | grep Found | wc -l
	   Reports 142 occurrences.

	4. Re-issue ./zeromem 700

	5. Re-scan memory, only three occurrences:

Scanning 617398272 bytes of memory from /dev/mem...
 Found pattern at 0x70d41e8 (402A25246B61654C)
 Found pattern at 0xf5931f8 (402A25246B61654C)
 Found pattern at 0xf5e11e8 (402A25246B61654C)

The scanning is PAGE_SIZE aligned, and only one occurrence is considered
per page (otherwise the output would be hideous in kernels without
sanitization).

Those three occurrences will stay there indefinitely... like the PaX
team said, the remanence is proportional to the size of the allocations
and data itself. The more data allocated, the more time it takes for
that memory to be requested back by some other user (kernel or non
kernel). An even more simple test would be to load a file with vim or
nano, which contains a large (of hundred of megabytes magnitude) amount
of text patterns (a md5 hash works), and monitor RAM to see how long it
stays there after you have closed the editor. While most of it will
slowly disappear, at least there will some gaps that will remain for a
long time, possibly days, even under irregular high workloads.

I'm compiling a kernel with my current patches and will report back with
the results of these tools on that one. It will be 2.6.29.4 as well.

I'll make the sources of the tools available somewhere. Because of
corporate policy It might be wiser to find a place where I can make
this stuff available for longer time periods. Does anyone know the
process to request space under pub/linux/kernel/people/?

	Larry



^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 17:00                                       ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 17:00 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Peter Zijlstra, Alan Cox, pageexec, Arjan van de Ven,
	Ingo Molnar, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Alright, I wrote some small programs to test the data remanence on
memory. It works in a really simple way and functionality is provided by
three different tools:

	scanleak.c
	Scans physical memory (using /dev/mem) looking for the
	ulonglong pattern. Filtered /dev/mem access breaks this, so
	disable it temporarily in your kernel to use it.

	secretleak.c
	Writes the ulonglong pattern to memory (takes two arguments, a size
	in MB, and a number of seconds to delay after free).

	zeromem.c
	Zeroes all or given MBs of memory (allocating blocks of 100M which
	are contiguous). If no argument is given, it allocates more
	blocks to ensure we can zero past the used memory (might hit
	swap). This can be used to simulate memory workload.

I've done the following test (x86 vm, 600M RAM, 2.6.29.4):

	1. ./secretleak 600 1

Will allocate 629145600 bytes (600M).
Zeroing buffer at 0x926bb008... Done.
Writing pattern to 0x926bb008 (402A25246B61654C)... Done.
Freeing buffer at 0x926bb008... Done.
Sleeping for 1 seconds...

	2. (2 minutes afterwards) ./zeromem 700

Zeroing 734003200 bytes of memory
Zeroing block at 0xb1b01008 (104857600 bytes, 102400 kB)
Zeroing block at 0xab700008 (104857600 bytes, 102400 kB)
Zeroing block at 0xa52ff008 (104857600 bytes, 102400 kB)
Zeroing block at 0x9eefe008 (104857600 bytes, 102400 kB)
Zeroing block at 0x98afd008 (104857600 bytes, 102400 kB)
Zeroing block at 0x926fc008 (104857600 bytes, 102400 kB)
Zeroing block at 0x8c2fb008 (104857600 bytes, 102400 kB)
Freeing block at 0xb1b01008.
Freeing block at 0xab700008.
Freeing block at 0xa52ff008.
Freeing block at 0x9eefe008.
Freeing block at 0x98afd008.
Freeing block at 0x926fc008.
Freeing block at 0x8c2fb008.

	3. Immediately afterwards, sudo ./scanleak | grep Found | wc -l
	   Reports 142 occurrences.

	4. Re-issue ./zeromem 700

	5. Re-scan memory, only three occurrences:

Scanning 617398272 bytes of memory from /dev/mem...
 Found pattern at 0x70d41e8 (402A25246B61654C)
 Found pattern at 0xf5931f8 (402A25246B61654C)
 Found pattern at 0xf5e11e8 (402A25246B61654C)

The scanning is PAGE_SIZE aligned, and only one occurrence is considered
per page (otherwise the output would be hideous in kernels without
sanitization).

Those three occurrences will stay there indefinitely... like the PaX
team said, the remanence is proportional to the size of the allocations
and data itself. The more data allocated, the more time it takes for
that memory to be requested back by some other user (kernel or non
kernel). An even more simple test would be to load a file with vim or
nano, which contains a large (of hundred of megabytes magnitude) amount
of text patterns (a md5 hash works), and monitor RAM to see how long it
stays there after you have closed the editor. While most of it will
slowly disappear, at least there will some gaps that will remain for a
long time, possibly days, even under irregular high workloads.

I'm compiling a kernel with my current patches and will report back with
the results of these tools on that one. It will be 2.6.29.4 as well.

I'll make the sources of the tools available somewhere. Because of
corporate policy It might be wiser to find a place where I can make
this stuff available for longer time periods. Does anyone know the
process to request space under pub/linux/kernel/people/?

	Larry


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 17:00                                       ` Larry H.
@ 2009-05-30 17:25                                         ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 17:25 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Peter Zijlstra, Alan Cox, pageexec, Arjan van de Ven,
	Ingo Molnar, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Done. I just tested with different 'leak' sizes on a kernel patched with
the latest memory sanitization patch and the kfree/kmem_cache_free one:

	10M	- no occurrences with immediate scanmem
	40M	- no occurrences with immediate scanmem
	80M	- no occurrences with immediate scanmem
	160M	- no occurrences with immediate scanmem
	250M	- no occurrences with immediate scanmem
	300M	- no occurrences with immediate scanmem
	500M	- no occurrences with immediate scanmem
	600M	- with immediate zeromem 600 and scanmem afterwards,
		 no occurrences.

The results are satisfactory to me. With the patch applied but
sanitization disabled, a single megabyte test produces 54 occurrences
time after we ran secretleak. With higher amounts of memory, it gets
ridiculous.

I tested out of curiosity how the number of occurrences evolved through
different intervals on a sanitize-disabled system for a 10M leak:

2145
2128
2121
2118
2055
2046
2046
2046
2046
2045

That's under relatively idle work load. Until a larger size allocation
is requested somewhere else, the data is still there. The sad thing
about this, is that a website could be able to force Firefox, for
example, into allocating large amounts of memory (using Javascript, some
plugin, etc) and ensure that any cryptographic secrets previously used
in the browser will remain there even after it has been closed. The
unlikeliness of having such data disappear is directly proportional to
the size of the memory used by the process during its runtime.

This applies to any application (OpenOffice, vim loading large files,
your IRC or silc client, your image editing software, etc).

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 17:25                                         ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 17:25 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Peter Zijlstra, Alan Cox, pageexec, Arjan van de Ven,
	Ingo Molnar, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar

Done. I just tested with different 'leak' sizes on a kernel patched with
the latest memory sanitization patch and the kfree/kmem_cache_free one:

	10M	- no occurrences with immediate scanmem
	40M	- no occurrences with immediate scanmem
	80M	- no occurrences with immediate scanmem
	160M	- no occurrences with immediate scanmem
	250M	- no occurrences with immediate scanmem
	300M	- no occurrences with immediate scanmem
	500M	- no occurrences with immediate scanmem
	600M	- with immediate zeromem 600 and scanmem afterwards,
		 no occurrences.

The results are satisfactory to me. With the patch applied but
sanitization disabled, a single megabyte test produces 54 occurrences
time after we ran secretleak. With higher amounts of memory, it gets
ridiculous.

I tested out of curiosity how the number of occurrences evolved through
different intervals on a sanitize-disabled system for a 10M leak:

2145
2128
2121
2118
2055
2046
2046
2046
2046
2045

That's under relatively idle work load. Until a larger size allocation
is requested somewhere else, the data is still there. The sad thing
about this, is that a website could be able to force Firefox, for
example, into allocating large amounts of memory (using Javascript, some
plugin, etc) and ensure that any cryptographic secrets previously used
in the browser will remain there even after it has been closed. The
unlikeliness of having such data disappear is directly proportional to
the size of the memory used by the process during its runtime.

This applies to any application (OpenOffice, vim loading large files,
your IRC or silc client, your image editing software, etc).

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  8:20                           ` Larry H.
@ 2009-05-30 17:34                             ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:34 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 10:53 Sat 30 May     , Pekka Enberg wrote:
> >> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
> >> please test that yourself to see the results. Whoever wrote that ignored
> >> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
> >> before, it should have been noticed long time ago.
> >
> > An open-coded version of kzfree was being used in the kernel:
> >
> > http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
> >
> > Can we now get to the part where you explain how it's broken because I 
> > obviously "ignored how SLAB/SLUB works"?
> 
> You can find the answer in the code of sanitize_obj, within my 
> kfree patch. [...]

You need to provide a more sufficient and more constructive answer 
than that, if you propose upstream patches that impact the SLAB 
subsystem.

FYI Pekka is one of the SLAB subsystem maintainers so you need to 
convince him that your patches are the right approach. Trying to 
teach Pekka about SLAB internals in a condescending tone will only 
cause your patches to be ignored.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 17:34                             ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:34 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 10:53 Sat 30 May     , Pekka Enberg wrote:
> >> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
> >> please test that yourself to see the results. Whoever wrote that ignored
> >> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
> >> before, it should have been noticed long time ago.
> >
> > An open-coded version of kzfree was being used in the kernel:
> >
> > http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
> >
> > Can we now get to the part where you explain how it's broken because I 
> > obviously "ignored how SLAB/SLUB works"?
> 
> You can find the answer in the code of sanitize_obj, within my 
> kfree patch. [...]

You need to provide a more sufficient and more constructive answer 
than that, if you propose upstream patches that impact the SLAB 
subsystem.

FYI Pekka is one of the SLAB subsystem maintainers so you need to 
convince him that your patches are the right approach. Trying to 
teach Pekka about SLAB internals in a condescending tone will only 
cause your patches to be ignored.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  7:53                         ` Pekka Enberg
@ 2009-05-30 17:39                           ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:39 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Larry H.,
	Alan Cox, Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds


* Pekka Enberg <penberg@cs.helsinki.fi> wrote:

> Hi Larry,
>
> On 10:35 Sat 30 May, Pekka Enberg wrote:
>>> The GFP_SENSITIVE flag looks like a big hammer that we don't really
>>> need IMHO. It seems to me that most of the actual call-sites (crypto
>>> code, wireless keys, etc.) should probably just use kzfree()
>>> unconditionally to make sure we don't leak sensitive data. I did not
>>> look too closely but I don't think any of the sensitive kfree() calls
>>> are in fastpaths so the performance impact is negligible.
>
> Larry H. wrote:
>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>> please test that yourself to see the results. Whoever wrote that ignored
>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>> before, it should have been noticed long time ago.
>
> An open-coded version of kzfree was being used in the kernel:
>
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>
> Can we now get to the part where you explain how it's broken 
> because I obviously "ignored how SLAB/SLUB works"?

Yeah, kzfree() sounds like the right approach for all places that 
know it for sure that they dont want information to persist.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 17:39                           ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:39 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Larry H.,
	Alan Cox, Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds


* Pekka Enberg <penberg@cs.helsinki.fi> wrote:

> Hi Larry,
>
> On 10:35 Sat 30 May, Pekka Enberg wrote:
>>> The GFP_SENSITIVE flag looks like a big hammer that we don't really
>>> need IMHO. It seems to me that most of the actual call-sites (crypto
>>> code, wireless keys, etc.) should probably just use kzfree()
>>> unconditionally to make sure we don't leak sensitive data. I did not
>>> look too closely but I don't think any of the sensitive kfree() calls
>>> are in fastpaths so the performance impact is negligible.
>
> Larry H. wrote:
>> That's hopeless, and kzfree is broken. Like I said in my earlier reply,
>> please test that yourself to see the results. Whoever wrote that ignored
>> how SLAB/SLUB work and if kzfree had been used somewhere in the kernel
>> before, it should have been noticed long time ago.
>
> An open-coded version of kzfree was being used in the kernel:
>
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=00fcf2cb6f6bb421851c3ba062c0a36760ea6e53
>
> Can we now get to the part where you explain how it's broken 
> because I obviously "ignored how SLAB/SLUB works"?

Yeah, kzfree() sounds like the right approach for all places that 
know it for sure that they dont want information to persist.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30  9:05                           ` Larry H.
@ 2009-05-30 17:46                             ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:46 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 10:57 Sat 30 May     , Pekka Enberg wrote:
> > Larry H. wrote:
> >> Furthermore, selective clearing doesn't solve the roots of the problem.
> >> It's just adding bandages to a wound which never stops bleeding. I
> >> proposed an initial page flag because we could use it later for
> >> unconditional page clearing doing a one line change in a header file.
> >> I see a lot of speculation on what works and what doesn't, but
> >> there isn't much on the practical side of things, yet. I provided test
> >> results that proved some of the comments wrong, and I've referenced
> >> literature which shows the reasoning behind all this. What else can I do
> >> to make you understand you are missing the point here?
> >
> > Hey, if you want to add a CONFIG_ZERO_ALL_MEMORY_PARANOIA thing that can be 
> > disabled, go for it! But you have to find someone else to take the merge 
> > the SLAB bits because, quite frankly, I am not convinced it's worth it. And 
> > the hand waving you're doing here isn't really helping your case, sorry.
> 
> For a second I thought it was Ingo who was writing this e-mail. 
> Apologies about the confusion.

btw., i find this is rather hillarious: you thought it was me 
writing the reply and you answered Pekka's arguments with contempt 
and hand-waving.

Now that you realized that it's the SLAB maintainer you replied to, 
whom you cannot just hand-wave away, you apologize not for the 
bogosity of your argument and not for the concept - but you 
apologize for _thinking it was the wrong person_.

That is a rather dishonest style of discussion.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 17:46                             ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:46 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 10:57 Sat 30 May     , Pekka Enberg wrote:
> > Larry H. wrote:
> >> Furthermore, selective clearing doesn't solve the roots of the problem.
> >> It's just adding bandages to a wound which never stops bleeding. I
> >> proposed an initial page flag because we could use it later for
> >> unconditional page clearing doing a one line change in a header file.
> >> I see a lot of speculation on what works and what doesn't, but
> >> there isn't much on the practical side of things, yet. I provided test
> >> results that proved some of the comments wrong, and I've referenced
> >> literature which shows the reasoning behind all this. What else can I do
> >> to make you understand you are missing the point here?
> >
> > Hey, if you want to add a CONFIG_ZERO_ALL_MEMORY_PARANOIA thing that can be 
> > disabled, go for it! But you have to find someone else to take the merge 
> > the SLAB bits because, quite frankly, I am not convinced it's worth it. And 
> > the hand waving you're doing here isn't really helping your case, sorry.
> 
> For a second I thought it was Ingo who was writing this e-mail. 
> Apologies about the confusion.

btw., i find this is rather hillarious: you thought it was me 
writing the reply and you answered Pekka's arguments with contempt 
and hand-waving.

Now that you realized that it's the SLAB maintainer you replied to, 
whom you cannot just hand-wave away, you apologize not for the 
bogosity of your argument and not for the concept - but you 
apologize for _thinking it was the wrong person_.

That is a rather dishonest style of discussion.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-28 18:48                   ` pageexec
@ 2009-05-30 17:50                     ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:50 UTC (permalink / raw)
  To: pageexec
  Cc: Alan Cox, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Linus Torvalds


* pageexec@freemail.hu <pageexec@freemail.hu> wrote:

> On 28 May 2009 at 11:08, Ingo Molnar wrote:
> 
> > 
> > * Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > 
> > > > > As for being swapped out - I do not believe that kernel stacks can 
> > > > > ever be swapped out in Linux.
> > > > 
> > > > yes, i referred to that as an undesirable option - because it slows 
> > > > down pthread_create() quite substantially.
> > > > 
> > > > This needs before/after pthread_create() benchmark results.
> > > 
> > > kernel stacks can end up places you don't expect on hypervisor 
> > > based systems.
> > > 
> > > In most respects the benchmarks are pretty irrelevant - wiping 
> > > stuff has a performance cost, but its the sort of thing you only 
> > > want to do when you have a security requirement that needs it. At 
> > > that point the performance is secondary.
> > 
> > Bechmarks, of course, are not irrelevant _at all_.
> > 
> > So i'm asking for this "clear kernel stacks on freeing" aspect to be 
> > benchmarked thoroughly, as i expect it to have a negative impact - 
> > otherwise i'm NAK-ing this. Please Cc: me to measurements results.
> 
> last year while developing/debugging something else i also ran some kernel
> compilation tests and managed to dig out this one for you ('all' refers to
> all of PaX):
> 
> ------------------------------------------------------------------------------------------
> make -j4 2.6.24-rc7-i386-pax compiling 2.6.24-rc7-i386-pax (all with SANITIZE, no PARAVIRT)
> 565.63user 68.52system 5:25.52elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (1major+12486066minor)pagefaults 0swaps
> 
> 565.10user 68.28system 5:24.72elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+12485742minor)pagefaults 0swaps
> ------------------------------------------------------------------------------------------
> make -j4 2.6.24-rc5-i386-pax compiling 2.6.24-rc5-i386-pax (all but SANITIZE, no PARAVIRT)
> 559.74user 50.29system 5:12.79elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+12397482minor)pagefaults 0swaps
> 
> 561.41user 51.91system 5:14.55elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+12396877minor)pagefaults 0swaps
> ------------------------------------------------------------------------------------------
> 
> for the kernel times the overhead is about 68s vs. 51s, or 40% in 
> this particular case. while i don't know where this workload (the 
> kernel part) falls in the spectrum of real life workloads, it 
> definitely shows that if you're kernel bound, you should think 
> twice before using this in production (and there's the real-time 
> latency issue too).

Yes, clearing memory causes quite brutal overhead - as expected.

If only kernel stacks are cleared before reuse that will be less 
overhead - but still it has to be benchmarked (and the overhead has 
to be justified).

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 17:50                     ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 17:50 UTC (permalink / raw)
  To: pageexec
  Cc: Alan Cox, Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Linus Torvalds


* pageexec@freemail.hu <pageexec@freemail.hu> wrote:

> On 28 May 2009 at 11:08, Ingo Molnar wrote:
> 
> > 
> > * Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > 
> > > > > As for being swapped out - I do not believe that kernel stacks can 
> > > > > ever be swapped out in Linux.
> > > > 
> > > > yes, i referred to that as an undesirable option - because it slows 
> > > > down pthread_create() quite substantially.
> > > > 
> > > > This needs before/after pthread_create() benchmark results.
> > > 
> > > kernel stacks can end up places you don't expect on hypervisor 
> > > based systems.
> > > 
> > > In most respects the benchmarks are pretty irrelevant - wiping 
> > > stuff has a performance cost, but its the sort of thing you only 
> > > want to do when you have a security requirement that needs it. At 
> > > that point the performance is secondary.
> > 
> > Bechmarks, of course, are not irrelevant _at all_.
> > 
> > So i'm asking for this "clear kernel stacks on freeing" aspect to be 
> > benchmarked thoroughly, as i expect it to have a negative impact - 
> > otherwise i'm NAK-ing this. Please Cc: me to measurements results.
> 
> last year while developing/debugging something else i also ran some kernel
> compilation tests and managed to dig out this one for you ('all' refers to
> all of PaX):
> 
> ------------------------------------------------------------------------------------------
> make -j4 2.6.24-rc7-i386-pax compiling 2.6.24-rc7-i386-pax (all with SANITIZE, no PARAVIRT)
> 565.63user 68.52system 5:25.52elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (1major+12486066minor)pagefaults 0swaps
> 
> 565.10user 68.28system 5:24.72elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+12485742minor)pagefaults 0swaps
> ------------------------------------------------------------------------------------------
> make -j4 2.6.24-rc5-i386-pax compiling 2.6.24-rc5-i386-pax (all but SANITIZE, no PARAVIRT)
> 559.74user 50.29system 5:12.79elapsed 195%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+12397482minor)pagefaults 0swaps
> 
> 561.41user 51.91system 5:14.55elapsed 194%CPU (0avgtext+0avgdata 0maxresident)k
> 0inputs+0outputs (0major+12396877minor)pagefaults 0swaps
> ------------------------------------------------------------------------------------------
> 
> for the kernel times the overhead is about 68s vs. 51s, or 40% in 
> this particular case. while i don't know where this workload (the 
> kernel part) falls in the spectrum of real life workloads, it 
> definitely shows that if you're kernel bound, you should think 
> twice before using this in production (and there's the real-time 
> latency issue too).

Yes, clearing memory causes quite brutal overhead - as expected.

If only kernel stacks are cleared before reuse that will be less 
overhead - but still it has to be benchmarked (and the overhead has 
to be justified).

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 17:34                             ` Ingo Molnar
@ 2009-05-30 18:03                               ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 18:03 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 19:34 Sat 30 May     , Ingo Molnar wrote:
> You need to provide a more sufficient and more constructive answer 
> than that, if you propose upstream patches that impact the SLAB 
> subsystem.

Impact? If you mean introducing changes, definitely. If the word has
negative connotations in this context, definitely not ;)

> FYI Pekka is one of the SLAB subsystem maintainers so you need to 
> convince him that your patches are the right approach. Trying to 
> teach Pekka about SLAB internals in a condescending tone will only 
> cause your patches to be ignored.

I've never tried to teach you anything but security matters, so far.
And I've been quite unsuccessful at it, apparently. That said, please
let me explain why kzfree was broken (as of 2.6.29.4, I've been told
30-rc2 already has users of it).

The first issue is that SLOB has a broken ksize, which won't take into
consideration compound pages AFAIK. To fix this you will need to
introduce some changes in the way the slob_page structure is handled,
and add real size tracking to it. You will find these problems if you
try to implement a reliable kmem_ptr_validate for SLOB, too.

The second is that I've experienced issues with kzfree on 2.6.29.4, in
which something (apparently the freelist pointer) is overwritten and
leads to a NULL pointer deference in the next allocation in the affected
cache. I didn't fully analyze what was broken, besides that for
sanitizing the objects on kfree I needed to rely on the inuse size and
not the one reported by ksize, if I wanted to avoid hitting that
trailing meta-data.

I just noticed Johannes Weiner's patch from February 16.

BTW, talking about branches and call depth, you are proposing using
kzfree() which involves further test and call branches (including those
inside the specific ksize implementation of the allocator being used)
and it duplicates the check for ZERO_SIZE_PTR/NULL too. The function is
so simple that it should be a static inline declared in slab.h. It also
lacks any validation checks as performed in kfree (besides the zero
size/null ptr one).

Also, users of unconditional sanitization would see unnecessary
duplication of the clearing, causing a real performance hit (which would
be almost non existent otherwise). That will make kzfree unsuitable for
most hot spots like the crypto api and the mac80211 wep code.

Honestly your proposed approach seems a little weak.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 18:03                               ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 18:03 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 19:34 Sat 30 May     , Ingo Molnar wrote:
> You need to provide a more sufficient and more constructive answer 
> than that, if you propose upstream patches that impact the SLAB 
> subsystem.

Impact? If you mean introducing changes, definitely. If the word has
negative connotations in this context, definitely not ;)

> FYI Pekka is one of the SLAB subsystem maintainers so you need to 
> convince him that your patches are the right approach. Trying to 
> teach Pekka about SLAB internals in a condescending tone will only 
> cause your patches to be ignored.

I've never tried to teach you anything but security matters, so far.
And I've been quite unsuccessful at it, apparently. That said, please
let me explain why kzfree was broken (as of 2.6.29.4, I've been told
30-rc2 already has users of it).

The first issue is that SLOB has a broken ksize, which won't take into
consideration compound pages AFAIK. To fix this you will need to
introduce some changes in the way the slob_page structure is handled,
and add real size tracking to it. You will find these problems if you
try to implement a reliable kmem_ptr_validate for SLOB, too.

The second is that I've experienced issues with kzfree on 2.6.29.4, in
which something (apparently the freelist pointer) is overwritten and
leads to a NULL pointer deference in the next allocation in the affected
cache. I didn't fully analyze what was broken, besides that for
sanitizing the objects on kfree I needed to rely on the inuse size and
not the one reported by ksize, if I wanted to avoid hitting that
trailing meta-data.

I just noticed Johannes Weiner's patch from February 16.

BTW, talking about branches and call depth, you are proposing using
kzfree() which involves further test and call branches (including those
inside the specific ksize implementation of the allocator being used)
and it duplicates the check for ZERO_SIZE_PTR/NULL too. The function is
so simple that it should be a static inline declared in slab.h. It also
lacks any validation checks as performed in kfree (besides the zero
size/null ptr one).

Also, users of unconditional sanitization would see unnecessary
duplication of the clearing, causing a real performance hit (which would
be almost non existent otherwise). That will make kzfree unsuitable for
most hot spots like the crypto api and the mac80211 wep code.

Honestly your proposed approach seems a little weak.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 17:46                             ` Ingo Molnar
@ 2009-05-30 18:09                               ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 18:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 19:46 Sat 30 May     , Ingo Molnar wrote:
> btw., i find this is rather hillarious: you thought it was me 
> writing the reply and you answered Pekka's arguments with contempt 
> and hand-waving.
> 
> Now that you realized that it's the SLAB maintainer you replied to, 
> whom you cannot just hand-wave away, you apologize not for the 
> bogosity of your argument and not for the concept - but you 
> apologize for _thinking it was the wrong person_.
> 
> That is a rather dishonest style of discussion.

The hilarious thing is that I was being sarcastic. For a second It
seemed this attitude was about to come closer to yours, hence the joke.
I guess you aren't a fan of John Cleese as much as I am.

I wouldn't confuse Pekka with you from a mile away ;>

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 18:09                               ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 18:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 19:46 Sat 30 May     , Ingo Molnar wrote:
> btw., i find this is rather hillarious: you thought it was me 
> writing the reply and you answered Pekka's arguments with contempt 
> and hand-waving.
> 
> Now that you realized that it's the SLAB maintainer you replied to, 
> whom you cannot just hand-wave away, you apologize not for the 
> bogosity of your argument and not for the concept - but you 
> apologize for _thinking it was the wrong person_.
> 
> That is a rather dishonest style of discussion.

The hilarious thing is that I was being sarcastic. For a second It
seemed this attitude was about to come closer to yours, hence the joke.
I guess you aren't a fan of John Cleese as much as I am.

I wouldn't confuse Pekka with you from a mile away ;>

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 18:03                               ` Larry H.
@ 2009-05-30 18:21                                 ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 18:21 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 19:34 Sat 30 May     , Ingo Molnar wrote:
> > You need to provide a more sufficient and more constructive answer 
> > than that, if you propose upstream patches that impact the SLAB 
> > subsystem.
> 
> Impact? If you mean introducing changes, definitely. If the word 
> has negative connotations in this context, definitely not ;)

i mean its most obvious meaning: if you change the SLAB subsystem, 
it goes via Pekka. Also, changes to the page allocator impact the 
SLAB subsystem too (which is the most common front-end to the page 
allocator) so he has a say there too obviously ...

> > FYI Pekka is one of the SLAB subsystem maintainers so you need 
> > to convince him that your patches are the right approach. Trying 
> > to teach Pekka about SLAB internals in a condescending tone will 
> > only cause your patches to be ignored.
> 
> I've never tried to teach you anything but security matters, so 
> far. And I've been quite unsuccessful at it, apparently. That 
> said, please let me explain why kzfree was broken (as of 2.6.29.4, 
> I've been told 30-rc2 already has users of it).
> 
> The first issue is that SLOB has a broken ksize, which won't take 
> into consideration compound pages AFAIK. To fix this you will need 
> to introduce some changes in the way the slob_page structure is 
> handled, and add real size tracking to it. You will find these 
> problems if you try to implement a reliable kmem_ptr_validate for 
> SLOB, too.

SLOB is a rarely used (and high overhead) allocator. But the right 
answer there: fix kzalloc().

> The second is that I've experienced issues with kzfree on 
> 2.6.29.4, in which something (apparently the freelist pointer) is 
> overwritten and leads to a NULL pointer deference in the next 
> allocation in the affected cache. I didn't fully analyze what was 
> broken, besides that for sanitizing the objects on kfree I needed 
> to rely on the inuse size and not the one reported by ksize, if I 
> wanted to avoid hitting that trailing meta-data.
> 
> I just noticed Johannes Weiner's patch from February 16.

if kzfree() is broken then a number of places in the kernel that 
currently rely on it are potentially broken as well.

So as far as i'm concerned, your patchset is best expressed in the 
following form: Cryto, WEP and other sensitive places should be 
updated to use kzfree() to free keys.

This can be done unconditionally (without any Kconfig flag), as it's 
all in slow-paths - and because there's a real security value in 
sanitizing buffers that held sensitive keys, when they are freed.

Regarding a whole-sale 'clear everything on free' approach - that's 
both pointless security wise (sensitive information can still leak 
indefinitely [if you disagree i can provide an example]) and has a 
very high cost so it's not acceptable to normal Linux distros.

> BTW, talking about branches and call depth, you are proposing 
> using kzfree() which involves further test and call branches 
> (including those inside the specific ksize implementation of the 
> allocator being used) and it duplicates the check for 
> ZERO_SIZE_PTR/NULL too. The function is so simple that it should 
> be a static inline declared in slab.h. It also lacks any 
> validation checks as performed in kfree (besides the zero 
> size/null ptr one).
> 
> Also, users of unconditional sanitization would see unnecessary 
> duplication of the clearing, causing a real performance hit (which 
> would be almost non existent otherwise). That will make kzfree 
> unsuitable for most hot spots like the crypto api and the mac80211 
> wep code.
> 
> Honestly your proposed approach seems a little weak.

Unconditional honesty is definitely welcome ;-)

Freeing keys is an utter slow-path (if not then the clearing is the 
least of our performance worries), so any clearing cost is in the 
noise. Furthermore, kzfree() is an existing facility already in use. 
If it's reused by your patches that brings further advantages: 
kzfree(), if it has any bugs, will be fixed. While if you add a 
parallel facility kzfree() stays broken.

So your examples about real or suspected kzfree() breakages only 
strengthen the point that your patches should be using it. Keeping a 
rarely used kernel facility (like kzfree) correct is hard - 
splintering it by creating a parallel facility is actively harmful 
for that reason.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 18:21                                 ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 18:21 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 19:34 Sat 30 May     , Ingo Molnar wrote:
> > You need to provide a more sufficient and more constructive answer 
> > than that, if you propose upstream patches that impact the SLAB 
> > subsystem.
> 
> Impact? If you mean introducing changes, definitely. If the word 
> has negative connotations in this context, definitely not ;)

i mean its most obvious meaning: if you change the SLAB subsystem, 
it goes via Pekka. Also, changes to the page allocator impact the 
SLAB subsystem too (which is the most common front-end to the page 
allocator) so he has a say there too obviously ...

> > FYI Pekka is one of the SLAB subsystem maintainers so you need 
> > to convince him that your patches are the right approach. Trying 
> > to teach Pekka about SLAB internals in a condescending tone will 
> > only cause your patches to be ignored.
> 
> I've never tried to teach you anything but security matters, so 
> far. And I've been quite unsuccessful at it, apparently. That 
> said, please let me explain why kzfree was broken (as of 2.6.29.4, 
> I've been told 30-rc2 already has users of it).
> 
> The first issue is that SLOB has a broken ksize, which won't take 
> into consideration compound pages AFAIK. To fix this you will need 
> to introduce some changes in the way the slob_page structure is 
> handled, and add real size tracking to it. You will find these 
> problems if you try to implement a reliable kmem_ptr_validate for 
> SLOB, too.

SLOB is a rarely used (and high overhead) allocator. But the right 
answer there: fix kzalloc().

> The second is that I've experienced issues with kzfree on 
> 2.6.29.4, in which something (apparently the freelist pointer) is 
> overwritten and leads to a NULL pointer deference in the next 
> allocation in the affected cache. I didn't fully analyze what was 
> broken, besides that for sanitizing the objects on kfree I needed 
> to rely on the inuse size and not the one reported by ksize, if I 
> wanted to avoid hitting that trailing meta-data.
> 
> I just noticed Johannes Weiner's patch from February 16.

if kzfree() is broken then a number of places in the kernel that 
currently rely on it are potentially broken as well.

So as far as i'm concerned, your patchset is best expressed in the 
following form: Cryto, WEP and other sensitive places should be 
updated to use kzfree() to free keys.

This can be done unconditionally (without any Kconfig flag), as it's 
all in slow-paths - and because there's a real security value in 
sanitizing buffers that held sensitive keys, when they are freed.

Regarding a whole-sale 'clear everything on free' approach - that's 
both pointless security wise (sensitive information can still leak 
indefinitely [if you disagree i can provide an example]) and has a 
very high cost so it's not acceptable to normal Linux distros.

> BTW, talking about branches and call depth, you are proposing 
> using kzfree() which involves further test and call branches 
> (including those inside the specific ksize implementation of the 
> allocator being used) and it duplicates the check for 
> ZERO_SIZE_PTR/NULL too. The function is so simple that it should 
> be a static inline declared in slab.h. It also lacks any 
> validation checks as performed in kfree (besides the zero 
> size/null ptr one).
> 
> Also, users of unconditional sanitization would see unnecessary 
> duplication of the clearing, causing a real performance hit (which 
> would be almost non existent otherwise). That will make kzfree 
> unsuitable for most hot spots like the crypto api and the mac80211 
> wep code.
> 
> Honestly your proposed approach seems a little weak.

Unconditional honesty is definitely welcome ;-)

Freeing keys is an utter slow-path (if not then the clearing is the 
least of our performance worries), so any clearing cost is in the 
noise. Furthermore, kzfree() is an existing facility already in use. 
If it's reused by your patches that brings further advantages: 
kzfree(), if it has any bugs, will be fixed. While if you add a 
parallel facility kzfree() stays broken.

So your examples about real or suspected kzfree() breakages only 
strengthen the point that your patches should be using it. Keeping a 
rarely used kernel facility (like kzfree) correct is hard - 
splintering it by creating a parallel facility is actively harmful 
for that reason.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 17:25                                         ` Larry H.
@ 2009-05-30 18:32                                           ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 18:32 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, Alan Cox, pageexec,
	Arjan van de Ven, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar


* Larry H. <research@subreption.com> wrote:

> Done. I just tested with different 'leak' sizes on a kernel 
> patched with the latest memory sanitization patch and the 
> kfree/kmem_cache_free one:
> 
> 	10M	- no occurrences with immediate scanmem
> 	40M	- no occurrences with immediate scanmem
> 	80M	- no occurrences with immediate scanmem
> 	160M	- no occurrences with immediate scanmem
> 	250M	- no occurrences with immediate scanmem
> 	300M	- no occurrences with immediate scanmem
> 	500M	- no occurrences with immediate scanmem
> 	600M	- with immediate zeromem 600 and scanmem afterwards,
> 		 no occurrences.

Is the sensitive data (or portions/transformations of it) copied to 
the kernel stack and used there?

If not then this isnt a complete/sufficient/fair test of how 
sensitive data like crypto keys gets used by the kernel.

In reality sensitive data, if it's relied upon by the kernel, can 
(and does) make it to the kernel stack. We see it happen every day 
with function return values. Let me quote the example i mentioned 
earlier today:

[   96.138788]  [<ffffffff810ab62e>] perf_counter_exit_task+0x10e/0x3f3
[   96.145464]  [<ffffffff8104cf46>] do_exit+0x2e7/0x722
[   96.150837]  [<ffffffff810630cf>] ? up_read+0x9/0xb
[   96.156036]  [<ffffffff8151cc0b>] ? do_page_fault+0x27d/0x2a5
[   96.162141]  [<ffffffff8104d3f4>] do_group_exit+0x73/0xa0
[   96.167860]  [<ffffffff8104d433>] sys_exit_group+0x12/0x16
[   96.173665]  [<ffffffff8100bb2b>] system_call_fastpath+0x16/0x1b

This is a real stackdump and the 'ffffffff8151cc0b' 64-bit word is 
actually a leftover from a previous system entry. ( And this is at 
the bottom of the stack that gets cleared all the time - the top of 
the kernel stack is a lot more more persistent in practice and 
crypto calls tend to have a healthy stack footprint. )

Similarly, other sensitive data can be leaked via the kernel stack 
too.

So IMO the GFP_SENSITIVE facility (beyond being a technical misnomer 
- it should be something like GFP_NON_PERSISTENT instead) actually 
results in subtly _worse_ security in the end: because people (and 
organizations) 'think' that their keys are safe against information 
leaks via this space, while they are not.

The kernel stack can be freed, be reused by something else partially 
and then written out to disk (say as part of hibernation) where it's 
recoverable from the disk image.

Furthermore, there's no guarantee at all that a task wont stay 
around for a long time - with sensitive data still on its kernel 
stack.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 18:32                                           ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 18:32 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Peter Zijlstra, Alan Cox, pageexec,
	Arjan van de Ven, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar


* Larry H. <research@subreption.com> wrote:

> Done. I just tested with different 'leak' sizes on a kernel 
> patched with the latest memory sanitization patch and the 
> kfree/kmem_cache_free one:
> 
> 	10M	- no occurrences with immediate scanmem
> 	40M	- no occurrences with immediate scanmem
> 	80M	- no occurrences with immediate scanmem
> 	160M	- no occurrences with immediate scanmem
> 	250M	- no occurrences with immediate scanmem
> 	300M	- no occurrences with immediate scanmem
> 	500M	- no occurrences with immediate scanmem
> 	600M	- with immediate zeromem 600 and scanmem afterwards,
> 		 no occurrences.

Is the sensitive data (or portions/transformations of it) copied to 
the kernel stack and used there?

If not then this isnt a complete/sufficient/fair test of how 
sensitive data like crypto keys gets used by the kernel.

In reality sensitive data, if it's relied upon by the kernel, can 
(and does) make it to the kernel stack. We see it happen every day 
with function return values. Let me quote the example i mentioned 
earlier today:

[   96.138788]  [<ffffffff810ab62e>] perf_counter_exit_task+0x10e/0x3f3
[   96.145464]  [<ffffffff8104cf46>] do_exit+0x2e7/0x722
[   96.150837]  [<ffffffff810630cf>] ? up_read+0x9/0xb
[   96.156036]  [<ffffffff8151cc0b>] ? do_page_fault+0x27d/0x2a5
[   96.162141]  [<ffffffff8104d3f4>] do_group_exit+0x73/0xa0
[   96.167860]  [<ffffffff8104d433>] sys_exit_group+0x12/0x16
[   96.173665]  [<ffffffff8100bb2b>] system_call_fastpath+0x16/0x1b

This is a real stackdump and the 'ffffffff8151cc0b' 64-bit word is 
actually a leftover from a previous system entry. ( And this is at 
the bottom of the stack that gets cleared all the time - the top of 
the kernel stack is a lot more more persistent in practice and 
crypto calls tend to have a healthy stack footprint. )

Similarly, other sensitive data can be leaked via the kernel stack 
too.

So IMO the GFP_SENSITIVE facility (beyond being a technical misnomer 
- it should be something like GFP_NON_PERSISTENT instead) actually 
results in subtly _worse_ security in the end: because people (and 
organizations) 'think' that their keys are safe against information 
leaks via this space, while they are not.

The kernel stack can be freed, be reused by something else partially 
and then written out to disk (say as part of hibernation) where it's 
recoverable from the disk image.

Furthermore, there's no guarantee at all that a task wont stay 
around for a long time - with sensitive data still on its kernel 
stack.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 18:21                                 ` Ingo Molnar
@ 2009-05-30 18:45                                   ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 18:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 20:21 Sat 30 May     , Ingo Molnar wrote:
> SLOB is a rarely used (and high overhead) allocator. But the right 
> answer there: fix kzalloc().

If it's rarely used and nobody cares, why nobody has removed it yet?
Sames like the very same argument Peter and you used at some point
against these patches. Later in your response here you state the same
for kzfree. Interesting.

> if kzfree() is broken then a number of places in the kernel that 
> currently rely on it are potentially broken as well.

Indeed, but it was sitting there unused up to 2.6.29.4. Apparently only
-30-rc2 introduces users of the patch. Someone didn't do his homework
signing off the patch without testing it properly.

> So as far as i'm concerned, your patchset is best expressed in the 
> following form: Cryto, WEP and other sensitive places should be 
> updated to use kzfree() to free keys.
> 
> This can be done unconditionally (without any Kconfig flag), as it's 
> all in slow-paths - and because there's a real security value in 
> sanitizing buffers that held sensitive keys, when they are freed.

And the tty buffers, and the audit buffers, and the crypto block alg
contexts, and the generic algorithm contexts, and the input buffers
contexts, and ... alright, I get the picture!

> Regarding a whole-sale 'clear everything on free' approach - that's 
> both pointless security wise (sensitive information can still leak 
> indefinitely [if you disagree i can provide an example]) and has a 
> very high cost so it's not acceptable to normal Linux distros.

Go ahead, I want to see your example.

I don't even know why I'm still wasting my time replying to you, it's
clearly hopeless to try to get you off your egotistical, red herring
argument fueled attitude, which is likely a burden beyond this list for
you and everyone around, sadly.

> > Honestly your proposed approach seems a little weak.
> 
> Unconditional honesty is definitely welcome ;-)

When it's people's security at stake, if your reasoning and logic is
flawed, I have the moral obligation to tell you.

I'm here to make the kernel more secure, not to deal with your inability
to work with others without continuous conflicts and attempts to fall
into ridicule, that backfire at you in the end.

> Freeing keys is an utter slow-path (if not then the clearing is the 
> least of our performance worries), so any clearing cost is in the 
> noise. Furthermore, kzfree() is an existing facility already in use. 
> If it's reused by your patches that brings further advantages: 
> kzfree(), if it has any bugs, will be fixed. While if you add a 
> parallel facility kzfree() stays broken.

Have you benchmarked the addition of these changes? I would like to see
benchmarks done for these (crypto api included), since you are proposing
them.

> So your examples about real or suspected kzfree() breakages only 
> strengthen the point that your patches should be using it. Keeping a 
> rarely used kernel facility (like kzfree) correct is hard - 
> splintering it by creating a parallel facility is actively harmful 
> for that reason.

Fallacy ad hitlerum delivered. Impressive.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 18:45                                   ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 18:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 20:21 Sat 30 May     , Ingo Molnar wrote:
> SLOB is a rarely used (and high overhead) allocator. But the right 
> answer there: fix kzalloc().

If it's rarely used and nobody cares, why nobody has removed it yet?
Sames like the very same argument Peter and you used at some point
against these patches. Later in your response here you state the same
for kzfree. Interesting.

> if kzfree() is broken then a number of places in the kernel that 
> currently rely on it are potentially broken as well.

Indeed, but it was sitting there unused up to 2.6.29.4. Apparently only
-30-rc2 introduces users of the patch. Someone didn't do his homework
signing off the patch without testing it properly.

> So as far as i'm concerned, your patchset is best expressed in the 
> following form: Cryto, WEP and other sensitive places should be 
> updated to use kzfree() to free keys.
> 
> This can be done unconditionally (without any Kconfig flag), as it's 
> all in slow-paths - and because there's a real security value in 
> sanitizing buffers that held sensitive keys, when they are freed.

And the tty buffers, and the audit buffers, and the crypto block alg
contexts, and the generic algorithm contexts, and the input buffers
contexts, and ... alright, I get the picture!

> Regarding a whole-sale 'clear everything on free' approach - that's 
> both pointless security wise (sensitive information can still leak 
> indefinitely [if you disagree i can provide an example]) and has a 
> very high cost so it's not acceptable to normal Linux distros.

Go ahead, I want to see your example.

I don't even know why I'm still wasting my time replying to you, it's
clearly hopeless to try to get you off your egotistical, red herring
argument fueled attitude, which is likely a burden beyond this list for
you and everyone around, sadly.

> > Honestly your proposed approach seems a little weak.
> 
> Unconditional honesty is definitely welcome ;-)

When it's people's security at stake, if your reasoning and logic is
flawed, I have the moral obligation to tell you.

I'm here to make the kernel more secure, not to deal with your inability
to work with others without continuous conflicts and attempts to fall
into ridicule, that backfire at you in the end.

> Freeing keys is an utter slow-path (if not then the clearing is the 
> least of our performance worries), so any clearing cost is in the 
> noise. Furthermore, kzfree() is an existing facility already in use. 
> If it's reused by your patches that brings further advantages: 
> kzfree(), if it has any bugs, will be fixed. While if you add a 
> parallel facility kzfree() stays broken.

Have you benchmarked the addition of these changes? I would like to see
benchmarks done for these (crypto api included), since you are proposing
them.

> So your examples about real or suspected kzfree() breakages only 
> strengthen the point that your patches should be using it. Keeping a 
> rarely used kernel facility (like kzfree) correct is hard - 
> splintering it by creating a parallel facility is actively harmful 
> for that reason.

Fallacy ad hitlerum delivered. Impressive.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 18:45                                   ` Larry H.
@ 2009-05-30 19:08                                     ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 19:08 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 20:21 Sat 30 May     , Ingo Molnar wrote:
> > SLOB is a rarely used (and high overhead) allocator. But the right 
> > answer there: fix kzalloc().
> 
> If it's rarely used and nobody cares, why nobody has removed it yet?
> Sames like the very same argument Peter and you used at some point
> against these patches. Later in your response here you state the same
> for kzfree. Interesting.
> 
> > if kzfree() is broken then a number of places in the kernel that 
> > currently rely on it are potentially broken as well.
> 
> Indeed, but it was sitting there unused up to 2.6.29.4. Apparently only
> -30-rc2 introduces users of the patch. Someone didn't do his homework
> signing off the patch without testing it properly.
> 
> > So as far as i'm concerned, your patchset is best expressed in the 
> > following form: Cryto, WEP and other sensitive places should be 
> > updated to use kzfree() to free keys.
> > 
> > This can be done unconditionally (without any Kconfig flag), as it's 
> > all in slow-paths - and because there's a real security value in 
> > sanitizing buffers that held sensitive keys, when they are freed.
> 
> And the tty buffers, and the audit buffers, and the crypto block 
> alg contexts, and the generic algorithm contexts, and the input 
> buffers contexts, and ... alright, I get the picture!

Correct. Those are all either slowpaths or low-bandwidth paths. 

It's much better to help security in general for the cases where it 
can be done unconditionally - than to provide an option (that 
everyone really disables because it just tries to do too much) and 
_claim_ that we are more secure.

> > Regarding a whole-sale 'clear everything on free' approach - 
> > that's both pointless security wise (sensitive information can 
> > still leak indefinitely [if you disagree i can provide an 
> > example]) and has a very high cost so it's not acceptable to 
> > normal Linux distros.
> 
> Go ahead, I want to see your example.

Long-lived tasks that touched any crypto path (or other sensitive 
data in the kernel) and leaked it to the kernel stack can possibly 
keep sensitive information there indefinitely (especially if that 
information got there in an accidentally deep stack context) - up 
until the task exits. That information will outlive the freeing and 
sanitizing of the original sensitive data.

I gave a real example of how sensitive information (traces of 
previous execution) can survive on the kernel stack, via a real 
stack dump, in the previous mail i wrote.

> I don't even know why I'm still wasting my time replying to you, 
> it's clearly hopeless to try to get you off your egotistical, red 
> herring argument fueled attitude, which is likely a burden beyond 
> this list for you and everyone around, sadly.

Huh?

> > > Honestly your proposed approach seems a little weak.
> > 
> > Unconditional honesty is definitely welcome ;-)
> 
> When it's people's security at stake, if your reasoning and logic 
> is flawed, I have the moral obligation to tell you.
> 
> I'm here to make the kernel more secure, not to deal with your 
> inability to work with others without continuous conflicts and 
> attempts to fall into ridicule, that backfire at you in the end.
> 
> > Freeing keys is an utter slow-path (if not then the clearing is 
> > the least of our performance worries), so any clearing cost is 
> > in the noise. Furthermore, kzfree() is an existing facility 
> > already in use. If it's reused by your patches that brings 
> > further advantages: kzfree(), if it has any bugs, will be fixed. 
> > While if you add a parallel facility kzfree() stays broken.
> 
> Have you benchmarked the addition of these changes? I would like 
> to see benchmarks done for these (crypto api included), since you 
> are proposing them.

You have it the wrong way around. _You_ have the burden of proof 
here really, you are trying to get patches into the upstream kernel. 
I'm not obliged to do your homework for you. I might be wrong, and 
you can prove me wrong.

> > So your examples about real or suspected kzfree() breakages only 
> > strengthen the point that your patches should be using it. 
> > Keeping a rarely used kernel facility (like kzfree) correct is 
> > hard - splintering it by creating a parallel facility is 
> > actively harmful for that reason.
> 
> Fallacy ad hitlerum delivered. Impressive.

In what way is it a fallacy? It is a valid technical argument: more 
use of an existing facility is better than an overlapping parallel 
facility. It is a pretty much axiomatic argument.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 19:08                                     ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 19:08 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds


* Larry H. <research@subreption.com> wrote:

> On 20:21 Sat 30 May     , Ingo Molnar wrote:
> > SLOB is a rarely used (and high overhead) allocator. But the right 
> > answer there: fix kzalloc().
> 
> If it's rarely used and nobody cares, why nobody has removed it yet?
> Sames like the very same argument Peter and you used at some point
> against these patches. Later in your response here you state the same
> for kzfree. Interesting.
> 
> > if kzfree() is broken then a number of places in the kernel that 
> > currently rely on it are potentially broken as well.
> 
> Indeed, but it was sitting there unused up to 2.6.29.4. Apparently only
> -30-rc2 introduces users of the patch. Someone didn't do his homework
> signing off the patch without testing it properly.
> 
> > So as far as i'm concerned, your patchset is best expressed in the 
> > following form: Cryto, WEP and other sensitive places should be 
> > updated to use kzfree() to free keys.
> > 
> > This can be done unconditionally (without any Kconfig flag), as it's 
> > all in slow-paths - and because there's a real security value in 
> > sanitizing buffers that held sensitive keys, when they are freed.
> 
> And the tty buffers, and the audit buffers, and the crypto block 
> alg contexts, and the generic algorithm contexts, and the input 
> buffers contexts, and ... alright, I get the picture!

Correct. Those are all either slowpaths or low-bandwidth paths. 

It's much better to help security in general for the cases where it 
can be done unconditionally - than to provide an option (that 
everyone really disables because it just tries to do too much) and 
_claim_ that we are more secure.

> > Regarding a whole-sale 'clear everything on free' approach - 
> > that's both pointless security wise (sensitive information can 
> > still leak indefinitely [if you disagree i can provide an 
> > example]) and has a very high cost so it's not acceptable to 
> > normal Linux distros.
> 
> Go ahead, I want to see your example.

Long-lived tasks that touched any crypto path (or other sensitive 
data in the kernel) and leaked it to the kernel stack can possibly 
keep sensitive information there indefinitely (especially if that 
information got there in an accidentally deep stack context) - up 
until the task exits. That information will outlive the freeing and 
sanitizing of the original sensitive data.

I gave a real example of how sensitive information (traces of 
previous execution) can survive on the kernel stack, via a real 
stack dump, in the previous mail i wrote.

> I don't even know why I'm still wasting my time replying to you, 
> it's clearly hopeless to try to get you off your egotistical, red 
> herring argument fueled attitude, which is likely a burden beyond 
> this list for you and everyone around, sadly.

Huh?

> > > Honestly your proposed approach seems a little weak.
> > 
> > Unconditional honesty is definitely welcome ;-)
> 
> When it's people's security at stake, if your reasoning and logic 
> is flawed, I have the moral obligation to tell you.
> 
> I'm here to make the kernel more secure, not to deal with your 
> inability to work with others without continuous conflicts and 
> attempts to fall into ridicule, that backfire at you in the end.
> 
> > Freeing keys is an utter slow-path (if not then the clearing is 
> > the least of our performance worries), so any clearing cost is 
> > in the noise. Furthermore, kzfree() is an existing facility 
> > already in use. If it's reused by your patches that brings 
> > further advantages: kzfree(), if it has any bugs, will be fixed. 
> > While if you add a parallel facility kzfree() stays broken.
> 
> Have you benchmarked the addition of these changes? I would like 
> to see benchmarks done for these (crypto api included), since you 
> are proposing them.

You have it the wrong way around. _You_ have the burden of proof 
here really, you are trying to get patches into the upstream kernel. 
I'm not obliged to do your homework for you. I might be wrong, and 
you can prove me wrong.

> > So your examples about real or suspected kzfree() breakages only 
> > strengthen the point that your patches should be using it. 
> > Keeping a rarely used kernel facility (like kzfree) correct is 
> > hard - splintering it by creating a parallel facility is 
> > actively harmful for that reason.
> 
> Fallacy ad hitlerum delivered. Impressive.

In what way is it a fallacy? It is a valid technical argument: more 
use of an existing facility is better than an overlapping parallel 
facility. It is a pretty much axiomatic argument.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30 18:03                               ` Larry H.
@ 2009-05-30 20:22                                 ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30 20:22 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds,
	Matt Mackall

Hi Larry,

On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> The first issue is that SLOB has a broken ksize, which won't take into
> consideration compound pages AFAIK. To fix this you will need to
> introduce some changes in the way the slob_page structure is handled,
> and add real size tracking to it. You will find these problems if you
> try to implement a reliable kmem_ptr_validate for SLOB, too.

Does this mean that kzfree() isn't broken for SLAB/SLUB? Maybe I read
your emails wrong but you seemed to imply that.

As for SLOB ksize(), I am sure Matt Mackall would love to hear the
details how ksize() is broken there. I am having difficult time
understanding the bug you're pointing out here as SLOB does check for
is_slob_page() in ksize() and falls back to page.private if the page
is not PageSlobPage...

On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> The second is that I've experienced issues with kzfree on 2.6.29.4, in
> which something (apparently the freelist pointer) is overwritten and
> leads to a NULL pointer deference in the next allocation in the affected
> cache. I didn't fully analyze what was broken, besides that for
> sanitizing the objects on kfree I needed to rely on the inuse size and
> not the one reported by ksize, if I wanted to avoid hitting that
> trailing meta-data.

Which allocator are you talking about here?

On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> BTW, talking about branches and call depth, you are proposing using
> kzfree() which involves further test and call branches (including those
> inside the specific ksize implementation of the allocator being used)
> and it duplicates the check for ZERO_SIZE_PTR/NULL too. The function is
> so simple that it should be a static inline declared in slab.h. It also
> lacks any validation checks as performed in kfree (besides the zero
> size/null ptr one).
>
> Also, users of unconditional sanitization would see unnecessary
> duplication of the clearing, causing a real performance hit (which would
> be almost non existent otherwise). That will make kzfree unsuitable for
> most hot spots like the crypto api and the mac80211 wep code.
>
> Honestly your proposed approach seems a little weak.

Honestly, this seems like more hand-waving to me.

                                       Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 20:22                                 ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30 20:22 UTC (permalink / raw)
  To: Larry H.
  Cc: Ingo Molnar, Alan Cox, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds,
	Matt Mackall

Hi Larry,

On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> The first issue is that SLOB has a broken ksize, which won't take into
> consideration compound pages AFAIK. To fix this you will need to
> introduce some changes in the way the slob_page structure is handled,
> and add real size tracking to it. You will find these problems if you
> try to implement a reliable kmem_ptr_validate for SLOB, too.

Does this mean that kzfree() isn't broken for SLAB/SLUB? Maybe I read
your emails wrong but you seemed to imply that.

As for SLOB ksize(), I am sure Matt Mackall would love to hear the
details how ksize() is broken there. I am having difficult time
understanding the bug you're pointing out here as SLOB does check for
is_slob_page() in ksize() and falls back to page.private if the page
is not PageSlobPage...

On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> The second is that I've experienced issues with kzfree on 2.6.29.4, in
> which something (apparently the freelist pointer) is overwritten and
> leads to a NULL pointer deference in the next allocation in the affected
> cache. I didn't fully analyze what was broken, besides that for
> sanitizing the objects on kfree I needed to rely on the inuse size and
> not the one reported by ksize, if I wanted to avoid hitting that
> trailing meta-data.

Which allocator are you talking about here?

On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> BTW, talking about branches and call depth, you are proposing using
> kzfree() which involves further test and call branches (including those
> inside the specific ksize implementation of the allocator being used)
> and it duplicates the check for ZERO_SIZE_PTR/NULL too. The function is
> so simple that it should be a static inline declared in slab.h. It also
> lacks any validation checks as performed in kfree (besides the zero
> size/null ptr one).
>
> Also, users of unconditional sanitization would see unnecessary
> duplication of the clearing, causing a real performance hit (which would
> be almost non existent otherwise). That will make kzfree unsuitable for
> most hot spots like the crypto api and the mac80211 wep code.
>
> Honestly your proposed approach seems a little weak.

Honestly, this seems like more hand-waving to me.

                                       Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 19:08                                     ` Ingo Molnar
@ 2009-05-30 20:39                                       ` Rik van Riel
  -1 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 20:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Larry H.,
	Pekka Enberg, Alan Cox, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds

Ingo Molnar wrote:
> * Larry H. <research@subreption.com> wrote:
>> On 20:21 Sat 30 May     , Ingo Molnar wrote:

>>> Freeing keys is an utter slow-path (if not then the clearing is 
>>> the least of our performance worries), so any clearing cost is 
>>> in the noise. Furthermore, kzfree() is an existing facility 
>>> already in use. If it's reused by your patches that brings 
>>> further advantages: kzfree(), if it has any bugs, will be fixed. 
>>> While if you add a parallel facility kzfree() stays broken.
>> Have you benchmarked the addition of these changes? I would like 
>> to see benchmarks done for these (crypto api included), since you 
>> are proposing them.
> 
> You have it the wrong way around. _You_ have the burden of proof 
> here really, you are trying to get patches into the upstream kernel. 
> I'm not obliged to do your homework for you. I might be wrong, and 
> you can prove me wrong.

Larry's patches do not do what you propose they
should do, so why would he have to benchmark your
idea?

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 20:39                                       ` Rik van Riel
  0 siblings, 0 replies; 220+ messages in thread
From: Rik van Riel @ 2009-05-30 20:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Larry H.,
	Pekka Enberg, Alan Cox, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds

Ingo Molnar wrote:
> * Larry H. <research@subreption.com> wrote:
>> On 20:21 Sat 30 May     , Ingo Molnar wrote:

>>> Freeing keys is an utter slow-path (if not then the clearing is 
>>> the least of our performance worries), so any clearing cost is 
>>> in the noise. Furthermore, kzfree() is an existing facility 
>>> already in use. If it's reused by your patches that brings 
>>> further advantages: kzfree(), if it has any bugs, will be fixed. 
>>> While if you add a parallel facility kzfree() stays broken.
>> Have you benchmarked the addition of these changes? I would like 
>> to see benchmarks done for these (crypto api included), since you 
>> are proposing them.
> 
> You have it the wrong way around. _You_ have the burden of proof 
> here really, you are trying to get patches into the upstream kernel. 
> I'm not obliged to do your homework for you. I might be wrong, and 
> you can prove me wrong.

Larry's patches do not do what you propose they
should do, so why would he have to benchmark your
idea?

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30 20:39                                       ` Rik van Riel
@ 2009-05-30 20:53                                         ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30 20:53 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Ingo Molnar, Larry H.,
	Alan Cox, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	pageexec, Linus Torvalds

Hi Rik,

On Sat, May 30, 2009 at 11:39 PM, Rik van Riel <riel@redhat.com> wrote:
>>> Have you benchmarked the addition of these changes? I would like to see
>>> benchmarks done for these (crypto api included), since you are proposing
>>> them.
>>
>> You have it the wrong way around. _You_ have the burden of proof here
>> really, you are trying to get patches into the upstream kernel. I'm not
>> obliged to do your homework for you. I might be wrong, and you can prove me
>> wrong.
>
> Larry's patches do not do what you propose they
> should do, so why would he have to benchmark your
> idea?

It's pretty damn obvious that Larry's patches have a much bigger
performance impact than using kzfree() for selected parts of the
kernel. So yes, I do expect him to benchmark and demonstrate that
kzfree() has _performance problems_ before we can look into merging
his patches.

                                     Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 20:53                                         ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-30 20:53 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Ingo Molnar, Larry H.,
	Alan Cox, linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	pageexec, Linus Torvalds

Hi Rik,

On Sat, May 30, 2009 at 11:39 PM, Rik van Riel <riel@redhat.com> wrote:
>>> Have you benchmarked the addition of these changes? I would like to see
>>> benchmarks done for these (crypto api included), since you are proposing
>>> them.
>>
>> You have it the wrong way around. _You_ have the burden of proof here
>> really, you are trying to get patches into the upstream kernel. I'm not
>> obliged to do your homework for you. I might be wrong, and you can prove me
>> wrong.
>
> Larry's patches do not do what you propose they
> should do, so why would he have to benchmark your
> idea?

It's pretty damn obvious that Larry's patches have a much bigger
performance impact than using kzfree() for selected parts of the
kernel. So yes, I do expect him to benchmark and demonstrate that
kzfree() has _performance problems_ before we can look into merging
his patches.

                                     Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 20:53                                         ` Pekka Enberg
@ 2009-05-30 21:33                                           ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 21:33 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 23:53 Sat 30 May     , Pekka Enberg wrote:
> Hi Rik,
> 
> On Sat, May 30, 2009 at 11:39 PM, Rik van Riel <riel@redhat.com> wrote:
> >>> Have you benchmarked the addition of these changes? I would like to see
> >>> benchmarks done for these (crypto api included), since you are proposing
> >>> them.
> >>
> >> You have it the wrong way around. _You_ have the burden of proof here
> >> really, you are trying to get patches into the upstream kernel. I'm not
> >> obliged to do your homework for you. I might be wrong, and you can prove me
> >> wrong.
> >
> > Larry's patches do not do what you propose they
> > should do, so why would he have to benchmark your
> > idea?
> 
> It's pretty damn obvious that Larry's patches have a much bigger
> performance impact than using kzfree() for selected parts of the
> kernel. So yes, I do expect him to benchmark and demonstrate that
> kzfree() has _performance problems_ before we can look into merging
> his patches.

I was pointing out that the 'those test and jump/call branches have
performance hits' argument, while nonsensical, applies to kzfree and
with even more negative connotations (deeper call depth, more test
branches used in ksize and kfree, lack of pointer validation).

Also there's no kmem_cache_kzfree, either. There are some caches you
might want to look at.

Regarding the 'damn obvious much bigger performance impact': they have
none. You don't like it? Don't use the boot time option. And the next
version using a Kconfig option to disable it altogether is coming. Plus
I'll remove the sanitize_obj function altogether. Guess why I'm doing
that? Because there might be some benefit in trying to keep you happy
regarding that specific aspect of the patch.

Alan already pointed out this very clearly. Alan and I initially had
conflicting opinions about the first patches, we came to a point of
agreement. Rik also proposed changes, which I agreed upon and followed
up. They provided constructive critics and suggestions.

But you and the other cabal of vagueness have only sent mostly useless
comments, outright uncivil responses, obvious misdirection attempts,
unfounded critics, etc. I haven't seen more fallacies put together since
the last time I read an unreleased film script by Jerry Lewis.

If you think you have the power to decide when to cripple the kernel,
and what goes in or out by your own will, you missed the point about how
the Linux kernel became what it is today.

While we are at it, did any of you (Pekka, Ingo, Peter) bother reading
the very first paper I referenced in the very first patch?:

http://www.stanford.edu/~blp/papers/shredding.html/#kernel-appendix

Could you _please_ bother your highness with an earthly five minutes
read of that paper? If you don't have other magnificent obligations to
attend to. _Please_.

	Larry

PS: I'm still thanking myself for not implementing the kthread /
multiple page pool based approach. Lord, what could have happened if I
did.
> 

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 21:33                                           ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 21:33 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 23:53 Sat 30 May     , Pekka Enberg wrote:
> Hi Rik,
> 
> On Sat, May 30, 2009 at 11:39 PM, Rik van Riel <riel@redhat.com> wrote:
> >>> Have you benchmarked the addition of these changes? I would like to see
> >>> benchmarks done for these (crypto api included), since you are proposing
> >>> them.
> >>
> >> You have it the wrong way around. _You_ have the burden of proof here
> >> really, you are trying to get patches into the upstream kernel. I'm not
> >> obliged to do your homework for you. I might be wrong, and you can prove me
> >> wrong.
> >
> > Larry's patches do not do what you propose they
> > should do, so why would he have to benchmark your
> > idea?
> 
> It's pretty damn obvious that Larry's patches have a much bigger
> performance impact than using kzfree() for selected parts of the
> kernel. So yes, I do expect him to benchmark and demonstrate that
> kzfree() has _performance problems_ before we can look into merging
> his patches.

I was pointing out that the 'those test and jump/call branches have
performance hits' argument, while nonsensical, applies to kzfree and
with even more negative connotations (deeper call depth, more test
branches used in ksize and kfree, lack of pointer validation).

Also there's no kmem_cache_kzfree, either. There are some caches you
might want to look at.

Regarding the 'damn obvious much bigger performance impact': they have
none. You don't like it? Don't use the boot time option. And the next
version using a Kconfig option to disable it altogether is coming. Plus
I'll remove the sanitize_obj function altogether. Guess why I'm doing
that? Because there might be some benefit in trying to keep you happy
regarding that specific aspect of the patch.

Alan already pointed out this very clearly. Alan and I initially had
conflicting opinions about the first patches, we came to a point of
agreement. Rik also proposed changes, which I agreed upon and followed
up. They provided constructive critics and suggestions.

But you and the other cabal of vagueness have only sent mostly useless
comments, outright uncivil responses, obvious misdirection attempts,
unfounded critics, etc. I haven't seen more fallacies put together since
the last time I read an unreleased film script by Jerry Lewis.

If you think you have the power to decide when to cripple the kernel,
and what goes in or out by your own will, you missed the point about how
the Linux kernel became what it is today.

While we are at it, did any of you (Pekka, Ingo, Peter) bother reading
the very first paper I referenced in the very first patch?:

http://www.stanford.edu/~blp/papers/shredding.html/#kernel-appendix

Could you _please_ bother your highness with an earthly five minutes
read of that paper? If you don't have other magnificent obligations to
attend to. _Please_.

	Larry

PS: I'm still thanking myself for not implementing the kthread /
multiple page pool based approach. Lord, what could have happened if I
did.
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 20:39                                       ` Rik van Riel
@ 2009-05-30 22:10                                         ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 22:10 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Larry H.,
	Pekka Enberg, Alan Cox, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds


* Rik van Riel <riel@redhat.com> wrote:

> Ingo Molnar wrote:
>> * Larry H. <research@subreption.com> wrote:
>>> On 20:21 Sat 30 May     , Ingo Molnar wrote:
>
>>>> Freeing keys is an utter slow-path (if not then the clearing is  
>>>> the least of our performance worries), so any clearing cost is in 
>>>> the noise. Furthermore, kzfree() is an existing facility already in 
>>>> use. If it's reused by your patches that brings further advantages: 
>>>> kzfree(), if it has any bugs, will be fixed. While if you add a 
>>>> parallel facility kzfree() stays broken.
>>> Have you benchmarked the addition of these changes? I would like to 
>>> see benchmarks done for these (crypto api included), since you are 
>>> proposing them.
>>
>> You have it the wrong way around. _You_ have the burden of proof here 
>> really, you are trying to get patches into the upstream kernel. I'm not 
>> obliged to do your homework for you. I might be wrong, and you can 
>> prove me wrong.
>
> Larry's patches do not do what you propose they should do, so why 
> would he have to benchmark your idea?

My (and AFAICT Pekka's) suggestion was to use unconditional kzfree() 
in the few places where it matters: crypto/WEP key and input stream 
freeing.

His counter-argument was that it is unacceptable overhead - without 
any supporting data. I dont think the overhead is a problem in those 
cases (without any supporting data either).

Obviously the argument is best settled by measurements. Done by 
whoever wants to push this code.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 22:10                                         ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 22:10 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Larry H.,
	Pekka Enberg, Alan Cox, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds


* Rik van Riel <riel@redhat.com> wrote:

> Ingo Molnar wrote:
>> * Larry H. <research@subreption.com> wrote:
>>> On 20:21 Sat 30 May     , Ingo Molnar wrote:
>
>>>> Freeing keys is an utter slow-path (if not then the clearing is  
>>>> the least of our performance worries), so any clearing cost is in 
>>>> the noise. Furthermore, kzfree() is an existing facility already in 
>>>> use. If it's reused by your patches that brings further advantages: 
>>>> kzfree(), if it has any bugs, will be fixed. While if you add a 
>>>> parallel facility kzfree() stays broken.
>>> Have you benchmarked the addition of these changes? I would like to 
>>> see benchmarks done for these (crypto api included), since you are 
>>> proposing them.
>>
>> You have it the wrong way around. _You_ have the burden of proof here 
>> really, you are trying to get patches into the upstream kernel. I'm not 
>> obliged to do your homework for you. I might be wrong, and you can 
>> prove me wrong.
>
> Larry's patches do not do what you propose they should do, so why 
> would he have to benchmark your idea?

My (and AFAICT Pekka's) suggestion was to use unconditional kzfree() 
in the few places where it matters: crypto/WEP key and input stream 
freeing.

His counter-argument was that it is unacceptable overhead - without 
any supporting data. I dont think the overhead is a problem in those 
cases (without any supporting data either).

Obviously the argument is best settled by measurements. Done by 
whoever wants to push this code.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 20:22                                 ` Pekka Enberg
@ 2009-05-30 22:14                                   ` Ingo Molnar
  -1 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 22:14 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Larry H.,
	Alan Cox, Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds, Matt Mackall


* Pekka Enberg <penberg@cs.helsinki.fi> wrote:

> Hi Larry,
> 
> On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> > The first issue is that SLOB has a broken ksize, which won't take into
> > consideration compound pages AFAIK. To fix this you will need to
> > introduce some changes in the way the slob_page structure is handled,
> > and add real size tracking to it. You will find these problems if you
> > try to implement a reliable kmem_ptr_validate for SLOB, too.
> 
> Does this mean that kzfree() isn't broken for SLAB/SLUB? Maybe I 
> read your emails wrong but you seemed to imply that.

Yep, he definitely wrote that:

    http://lkml.org/lkml/2009/5/30/30

 [...]
 |
 | That's hopeless, and kzfree is broken. Like I said in my earlier 
 | reply, please test that yourself to see the results. Whoever 
 | wrote that ignored how SLAB/SLUB work and if kzfree had been used 
 | somewhere in the kernel before, it should have been noticed long 
 | time ago.
 |
 [...]

Very puzzling claims i have to say.

	Ingo

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 22:14                                   ` Ingo Molnar
  0 siblings, 0 replies; 220+ messages in thread
From: Ingo Molnar @ 2009-05-30 22:14 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Larry H.,
	Alan Cox, Rik van Riel, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds, Matt Mackall


* Pekka Enberg <penberg@cs.helsinki.fi> wrote:

> Hi Larry,
> 
> On Sat, May 30, 2009 at 9:03 PM, Larry H. <research@subreption.com> wrote:
> > The first issue is that SLOB has a broken ksize, which won't take into
> > consideration compound pages AFAIK. To fix this you will need to
> > introduce some changes in the way the slob_page structure is handled,
> > and add real size tracking to it. You will find these problems if you
> > try to implement a reliable kmem_ptr_validate for SLOB, too.
> 
> Does this mean that kzfree() isn't broken for SLAB/SLUB? Maybe I 
> read your emails wrong but you seemed to imply that.

Yep, he definitely wrote that:

    http://lkml.org/lkml/2009/5/30/30

 [...]
 |
 | That's hopeless, and kzfree is broken. Like I said in my earlier 
 | reply, please test that yourself to see the results. Whoever 
 | wrote that ignored how SLAB/SLUB work and if kzfree had been used 
 | somewhere in the kernel before, it should have been noticed long 
 | time ago.
 |
 [...]

Very puzzling claims i have to say.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30 20:53                                         ` Pekka Enberg
@ 2009-05-30 23:10                                           ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30 23:10 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> It's pretty damn obvious that Larry's patches have a much bigger
> performance impact than using kzfree() for selected parts of the
> kernel. So yes, I do expect him to benchmark and demonstrate that
> kzfree() has _performance problems_ before we can look into merging
> his patches.

We seem to be muddling up multiple things here which is not helpful.

There are three things going on

#1 Is ksize() buggy ?

#2 Using kzfree() to clear specific bits of memory (and I question the
kzfree implementation as it seems ksize can return numbers much much
bigger than the allocated space you need to clear - correct but oversize)
or using other flags. I'd favour kzfree personally (and fixing it to work
properly)

#3 People wanting to be able to select for more security *irrespective*
of performance cost. Which is no different to SELinux for example.


Conflating them all into one mess is causing confusion


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 23:10                                           ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30 23:10 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> It's pretty damn obvious that Larry's patches have a much bigger
> performance impact than using kzfree() for selected parts of the
> kernel. So yes, I do expect him to benchmark and demonstrate that
> kzfree() has _performance problems_ before we can look into merging
> his patches.

We seem to be muddling up multiple things here which is not helpful.

There are three things going on

#1 Is ksize() buggy ?

#2 Using kzfree() to clear specific bits of memory (and I question the
kzfree implementation as it seems ksize can return numbers much much
bigger than the allocated space you need to clear - correct but oversize)
or using other flags. I'd favour kzfree personally (and fixing it to work
properly)

#3 People wanting to be able to select for more security *irrespective*
of performance cost. Which is no different to SELinux for example.


Conflating them all into one mess is causing confusion

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 21:33                                           ` Larry H.
@ 2009-05-30 23:13                                             ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30 23:13 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

> I was pointing out that the 'those test and jump/call branches have
> performance hits' argument, while nonsensical, applies to kzfree and
> with even more negative connotations (deeper call depth, more test
> branches used in ksize and kfree, lack of pointer validation).

But they only apply to kzfree - there isn't a cost to anyone else. You've
move the decision to compile time which for the fast path stuff when you
just want to clear keys and other oddments is a big win.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 23:13                                             ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30 23:13 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

> I was pointing out that the 'those test and jump/call branches have
> performance hits' argument, while nonsensical, applies to kzfree and
> with even more negative connotations (deeper call depth, more test
> branches used in ksize and kfree, lack of pointer validation).

But they only apply to kzfree - there isn't a cost to anyone else. You've
move the decision to compile time which for the fast path stuff when you
just want to clear keys and other oddments is a big win.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 22:10                                         ` Ingo Molnar
@ 2009-05-30 23:15                                           ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30 23:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	Pekka Enberg, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds

> Obviously the argument is best settled by measurements. Done by 
> whoever wants to push this code.

How do you measure security as a mathematical quantity in a benchtest ?

In terms of performance its pretty easy to stick a counter in kzfree in
2.6.30-rc. The number that comes up is very very low.

Alan

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 23:15                                           ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-30 23:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	Pekka Enberg, linux-kernel, Linus Torvalds, linux-mm,
	Ingo Molnar, pageexec, Linus Torvalds

> Obviously the argument is best settled by measurements. Done by 
> whoever wants to push this code.

How do you measure security as a mathematical quantity in a benchtest ?

In terms of performance its pretty easy to stick a counter in kzfree in
2.6.30-rc. The number that comes up is very very low.

Alan

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 23:13                                             ` Alan Cox
@ 2009-05-30 23:18                                               ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 23:18 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka Enberg, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 00:13 Sun 31 May     , Alan Cox wrote:
> > I was pointing out that the 'those test and jump/call branches have
> > performance hits' argument, while nonsensical, applies to kzfree and
> > with even more negative connotations (deeper call depth, more test
> > branches used in ksize and kfree, lack of pointer validation).
> 
> But they only apply to kzfree - there isn't a cost to anyone else. You've
> move the decision to compile time which for the fast path stuff when you
> just want to clear keys and other oddments is a big win.

OK, I'm going to squeeze some time and provide patches that perform the
same my original page bit ones did, but using kzfree. Behold code like
in the tty buffer management, which uses the page allocator directly for
allocations greater than PAGE_SIZE in length. That needs special
treatment, and is exactly the reason I've proposed unconditional
sanitization since the original patches were rejected.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-30 23:18                                               ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-30 23:18 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka Enberg, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 00:13 Sun 31 May     , Alan Cox wrote:
> > I was pointing out that the 'those test and jump/call branches have
> > performance hits' argument, while nonsensical, applies to kzfree and
> > with even more negative connotations (deeper call depth, more test
> > branches used in ksize and kfree, lack of pointer validation).
> 
> But they only apply to kzfree - there isn't a cost to anyone else. You've
> move the decision to compile time which for the fast path stuff when you
> just want to clear keys and other oddments is a big win.

OK, I'm going to squeeze some time and provide patches that perform the
same my original page bit ones did, but using kzfree. Behold code like
in the tty buffer management, which uses the page allocator directly for
allocations greater than PAGE_SIZE in length. That needs special
treatment, and is exactly the reason I've proposed unconditional
sanitization since the original patches were rejected.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30 23:10                                           ` Alan Cox
@ 2009-05-31  6:14                                             ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31  6:14 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>> It's pretty damn obvious that Larry's patches have a much bigger
>> performance impact than using kzfree() for selected parts of the
>> kernel. So yes, I do expect him to benchmark and demonstrate that
>> kzfree() has _performance problems_ before we can look into merging
>> his patches.
>
> We seem to be muddling up multiple things here which is not helpful.

Yup.

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> There are three things going on
>
> #1 Is ksize() buggy ?

No, there's nothing wrong with ksize() I am aware of. Yes, Larry has
been saying it is but hasn't provided any evidence so far.

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> #2 Using kzfree() to clear specific bits of memory (and I question the
> kzfree implementation as it seems ksize can return numbers much much
> bigger than the allocated space you need to clear - correct but oversize)
> or using other flags. I'd favour kzfree personally (and fixing it to work
> properly)

Well, yes, that's what kzfree() needs to do given the current API. I
am not sure why you think it's a problem, though. Adding a size
argument to the function will make it more error prone.

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> #3 People wanting to be able to select for more security *irrespective*
> of performance cost. Which is no different to SELinux for example.

Yeah, as I said before, I really don't have any objections to this. I
just think nobody is going to enable it so memset() or kzfree() in
relevant places is probably a good idea.

                                   Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31  6:14                                             ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31  6:14 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>> It's pretty damn obvious that Larry's patches have a much bigger
>> performance impact than using kzfree() for selected parts of the
>> kernel. So yes, I do expect him to benchmark and demonstrate that
>> kzfree() has _performance problems_ before we can look into merging
>> his patches.
>
> We seem to be muddling up multiple things here which is not helpful.

Yup.

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> There are three things going on
>
> #1 Is ksize() buggy ?

No, there's nothing wrong with ksize() I am aware of. Yes, Larry has
been saying it is but hasn't provided any evidence so far.

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> #2 Using kzfree() to clear specific bits of memory (and I question the
> kzfree implementation as it seems ksize can return numbers much much
> bigger than the allocated space you need to clear - correct but oversize)
> or using other flags. I'd favour kzfree personally (and fixing it to work
> properly)

Well, yes, that's what kzfree() needs to do given the current API. I
am not sure why you think it's a problem, though. Adding a size
argument to the function will make it more error prone.

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> #3 People wanting to be able to select for more security *irrespective*
> of performance cost. Which is no different to SELinux for example.

Yeah, as I said before, I really don't have any objections to this. I
just think nobody is going to enable it so memset() or kzfree() in
relevant places is probably a good idea.

                                   Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 23:18                                               ` Larry H.
@ 2009-05-31  6:30                                                 ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31  6:30 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Larry H. wrote:
> OK, I'm going to squeeze some time and provide patches that perform the
> same my original page bit ones did, but using kzfree. Behold code like
> in the tty buffer management, which uses the page allocator directly for
> allocations greater than PAGE_SIZE in length. That needs special
> treatment, and is exactly the reason I've proposed unconditional
> sanitization since the original patches were rejected.

You might want to also do the patch Alan suggested for the security 
conscious people. That is, do a memset() in every page free and wrap 
that under CONFIG_SECURITY_PARANOIA or something. There's no reason the 
kzfree() patches and that can't co-exist.

			Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31  6:30                                                 ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31  6:30 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Larry H. wrote:
> OK, I'm going to squeeze some time and provide patches that perform the
> same my original page bit ones did, but using kzfree. Behold code like
> in the tty buffer management, which uses the page allocator directly for
> allocations greater than PAGE_SIZE in length. That needs special
> treatment, and is exactly the reason I've proposed unconditional
> sanitization since the original patches were rejected.

You might want to also do the patch Alan suggested for the security 
conscious people. That is, do a memset() in every page free and wrap 
that under CONFIG_SECURITY_PARANOIA or something. There's no reason the 
kzfree() patches and that can't co-exist.

			Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-30 21:33                                           ` Larry H.
@ 2009-05-31  7:17                                             ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31  7:17 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sun, May 31, 2009 at 12:33 AM, Larry H. <research@subreption.com> wrote:
> While we are at it, did any of you (Pekka, Ingo, Peter) bother reading
> the very first paper I referenced in the very first patch?:
>
> http://www.stanford.edu/~blp/papers/shredding.html/#kernel-appendix
>
> Could you _please_ bother your highness with an earthly five minutes
> read of that paper? If you don't have other magnificent obligations to
> attend to. _Please_.
>
> PS: I'm still thanking myself for not implementing the kthread /
> multiple page pool based approach. Lord, what could have happened if I
> did.

Something like that might make sense for fast-path code.

I think we could make GFP_SENSITIVE mean that allocations using it
force the actual slab pages to be cleaned up before they're returned
to the page allocator. As far as I can tell, we could then recycle
those slab pages to GFP_SENSITIVE allocations without any clearing
whatsoever as long as they're managed by slab. This ensures critical
data in kmalloc()'d memory is never leaked to userspace.

This doesn't fix all the cases Alan pointed out (unconditional
memset() in page free is clearly superior from security pov) but
should allow us to use GFP_SENSITIVE in fast-path cases where the
overhead of kzfree() is unacceptable.

                                      Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31  7:17                                             ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31  7:17 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sun, May 31, 2009 at 12:33 AM, Larry H. <research@subreption.com> wrote:
> While we are at it, did any of you (Pekka, Ingo, Peter) bother reading
> the very first paper I referenced in the very first patch?:
>
> http://www.stanford.edu/~blp/papers/shredding.html/#kernel-appendix
>
> Could you _please_ bother your highness with an earthly five minutes
> read of that paper? If you don't have other magnificent obligations to
> attend to. _Please_.
>
> PS: I'm still thanking myself for not implementing the kthread /
> multiple page pool based approach. Lord, what could have happened if I
> did.

Something like that might make sense for fast-path code.

I think we could make GFP_SENSITIVE mean that allocations using it
force the actual slab pages to be cleaned up before they're returned
to the page allocator. As far as I can tell, we could then recycle
those slab pages to GFP_SENSITIVE allocations without any clearing
whatsoever as long as they're managed by slab. This ensures critical
data in kmalloc()'d memory is never leaked to userspace.

This doesn't fix all the cases Alan pointed out (unconditional
memset() in page free is clearly superior from security pov) but
should allow us to use GFP_SENSITIVE in fast-path cases where the
overhead of kzfree() is unacceptable.

                                      Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-31 10:24                                               ` Alan Cox
@ 2009-05-31 10:24                                                 ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 10:24 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>>> #2 Using kzfree() to clear specific bits of memory (and I question the
>>> kzfree implementation as it seems ksize can return numbers much much
>>> bigger than the allocated space you need to clear - correct but oversize)
>>> or using other flags. I'd favour kzfree personally (and fixing it to work
>>> properly)
>> Well, yes, that's what kzfree() needs to do given the current API. I
>> am not sure why you think it's a problem, though. Adding a size
>> argument to the function will make it more error prone.

Alan Cox wrote:
> Definitely - am I right however that 
> 
> 	x = kzalloc(size, flags)
> 	blah
> 	kzfree(x)
> 
> can memset a good deal more memory (still safely) than "size" to zero ?

Yes because we actually _allocate_ more than requested the 'size' and 
the generic allocator has no way of knowing whether how much of the 
allocated region was actually used by the caller.

Alan Cox wrote:
> That has performance relevance if so and it ought to at least be
> documented.

Makes sense.

		Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 10:24                                                 ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 10:24 UTC (permalink / raw)
  To: Alan Cox
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

Hi Alan,

On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>>> #2 Using kzfree() to clear specific bits of memory (and I question the
>>> kzfree implementation as it seems ksize can return numbers much much
>>> bigger than the allocated space you need to clear - correct but oversize)
>>> or using other flags. I'd favour kzfree personally (and fixing it to work
>>> properly)
>> Well, yes, that's what kzfree() needs to do given the current API. I
>> am not sure why you think it's a problem, though. Adding a size
>> argument to the function will make it more error prone.

Alan Cox wrote:
> Definitely - am I right however that 
> 
> 	x = kzalloc(size, flags)
> 	blah
> 	kzfree(x)
> 
> can memset a good deal more memory (still safely) than "size" to zero ?

Yes because we actually _allocate_ more than requested the 'size' and 
the generic allocator has no way of knowing whether how much of the 
allocated region was actually used by the caller.

Alan Cox wrote:
> That has performance relevance if so and it ought to at least be
> documented.

Makes sense.

		Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-31  6:14                                             ` Pekka Enberg
@ 2009-05-31 10:24                                               ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-31 10:24 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #2 Using kzfree() to clear specific bits of memory (and I question the
> > kzfree implementation as it seems ksize can return numbers much much
> > bigger than the allocated space you need to clear - correct but oversize)
> > or using other flags. I'd favour kzfree personally (and fixing it to work
> > properly)
> 
> Well, yes, that's what kzfree() needs to do given the current API. I
> am not sure why you think it's a problem, though. Adding a size
> argument to the function will make it more error prone.

Definitely - am I right however that 

	x = kzalloc(size, flags)
	blah
	kzfree(x)

can memset a good deal more memory (still safely) than "size" to zero ?
That has performance relevance if so and it ought to at least be
documented.

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #3 People wanting to be able to select for more security *irrespective*
> > of performance cost. Which is no different to SELinux for example.
> 
> Yeah, as I said before, I really don't have any objections to this. I
> just think nobody is going to enable it so memset() or kzfree() in
> relevant places is probably a good idea.

Agreed entirely.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 10:24                                               ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-31 10:24 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar, pageexec,
	Linus Torvalds

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #2 Using kzfree() to clear specific bits of memory (and I question the
> > kzfree implementation as it seems ksize can return numbers much much
> > bigger than the allocated space you need to clear - correct but oversize)
> > or using other flags. I'd favour kzfree personally (and fixing it to work
> > properly)
> 
> Well, yes, that's what kzfree() needs to do given the current API. I
> am not sure why you think it's a problem, though. Adding a size
> argument to the function will make it more error prone.

Definitely - am I right however that 

	x = kzalloc(size, flags)
	blah
	kzfree(x)

can memset a good deal more memory (still safely) than "size" to zero ?
That has performance relevance if so and it ought to at least be
documented.

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #3 People wanting to be able to select for more security *irrespective*
> > of performance cost. Which is no different to SELinux for example.
> 
> Yeah, as I said before, I really don't have any objections to this. I
> just think nobody is going to enable it so memset() or kzfree() in
> relevant places is probably a good idea.

Agreed entirely.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31  6:30                                                 ` Pekka Enberg
@ 2009-05-31 11:49                                                   ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 11:49 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 09:30 Sun 31 May     , Pekka Enberg wrote:
> Larry H. wrote:
>> OK, I'm going to squeeze some time and provide patches that perform the
>> same my original page bit ones did, but using kzfree. Behold code like
>> in the tty buffer management, which uses the page allocator directly for
>> allocations greater than PAGE_SIZE in length. That needs special
>> treatment, and is exactly the reason I've proposed unconditional
>> sanitization since the original patches were rejected.
>
> You might want to also do the patch Alan suggested for the security 
> conscious people. That is, do a memset() in every page free and wrap that 
> under CONFIG_SECURITY_PARANOIA or something. There's no reason the kzfree() 
> patches and that can't co-exist.

I know you came late into the discussion, but if you had invested a
minute checking the thread, you would have spotted a patch in which a
Kconfig option was used to disable the sensitive page flag effects as whole.
The very first one.

I'm not wasting my time re-workign a patch which has been already
rejected. Go ahead and do it in your own time if you wish, it's GPL
anyway.

	Larry


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 11:49                                                   ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 11:49 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 09:30 Sun 31 May     , Pekka Enberg wrote:
> Larry H. wrote:
>> OK, I'm going to squeeze some time and provide patches that perform the
>> same my original page bit ones did, but using kzfree. Behold code like
>> in the tty buffer management, which uses the page allocator directly for
>> allocations greater than PAGE_SIZE in length. That needs special
>> treatment, and is exactly the reason I've proposed unconditional
>> sanitization since the original patches were rejected.
>
> You might want to also do the patch Alan suggested for the security 
> conscious people. That is, do a memset() in every page free and wrap that 
> under CONFIG_SECURITY_PARANOIA or something. There's no reason the kzfree() 
> patches and that can't co-exist.

I know you came late into the discussion, but if you had invested a
minute checking the thread, you would have spotted a patch in which a
Kconfig option was used to disable the sensitive page flag effects as whole.
The very first one.

I'm not wasting my time re-workign a patch which has been already
rejected. Go ahead and do it in your own time if you wish, it's GPL
anyway.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31  7:17                                             ` Pekka Enberg
@ 2009-05-31 11:58                                               ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 11:58 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:17 Sun 31 May     , Pekka Enberg wrote:
> On Sun, May 31, 2009 at 12:33 AM, Larry H. <research@subreption.com> wrote:
> > While we are at it, did any of you (Pekka, Ingo, Peter) bother reading
> > the very first paper I referenced in the very first patch?:
> >
> > http://www.stanford.edu/~blp/papers/shredding.html/#kernel-appendix
> >
> > Could you _please_ bother your highness with an earthly five minutes
> > read of that paper? If you don't have other magnificent obligations to
> > attend to. _Please_.
> >
> > PS: I'm still thanking myself for not implementing the kthread /
> > multiple page pool based approach. Lord, what could have happened if I
> > did.
> 
> Something like that might make sense for fast-path code.
> 
> I think we could make GFP_SENSITIVE mean that allocations using it
> force the actual slab pages to be cleaned up before they're returned
> to the page allocator. As far as I can tell, we could then recycle
> those slab pages to GFP_SENSITIVE allocations without any clearing
> whatsoever as long as they're managed by slab. This ensures critical
> data in kmalloc()'d memory is never leaked to userspace.
> 
> This doesn't fix all the cases Alan pointed out (unconditional
> memset() in page free is clearly superior from security pov) but
> should allow us to use GFP_SENSITIVE in fast-path cases where the
> overhead of kzfree() is unacceptable.

Thanks for coming to the conclusion that unconditional memory
sanitization is the correct approach.

I thought this had been stated numerous times before in this thread. Are
you serious about your responses or you are just clowning around? It's
amusing, I give you that much.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 11:58                                               ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 11:58 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 10:17 Sun 31 May     , Pekka Enberg wrote:
> On Sun, May 31, 2009 at 12:33 AM, Larry H. <research@subreption.com> wrote:
> > While we are at it, did any of you (Pekka, Ingo, Peter) bother reading
> > the very first paper I referenced in the very first patch?:
> >
> > http://www.stanford.edu/~blp/papers/shredding.html/#kernel-appendix
> >
> > Could you _please_ bother your highness with an earthly five minutes
> > read of that paper? If you don't have other magnificent obligations to
> > attend to. _Please_.
> >
> > PS: I'm still thanking myself for not implementing the kthread /
> > multiple page pool based approach. Lord, what could have happened if I
> > did.
> 
> Something like that might make sense for fast-path code.
> 
> I think we could make GFP_SENSITIVE mean that allocations using it
> force the actual slab pages to be cleaned up before they're returned
> to the page allocator. As far as I can tell, we could then recycle
> those slab pages to GFP_SENSITIVE allocations without any clearing
> whatsoever as long as they're managed by slab. This ensures critical
> data in kmalloc()'d memory is never leaked to userspace.
> 
> This doesn't fix all the cases Alan pointed out (unconditional
> memset() in page free is clearly superior from security pov) but
> should allow us to use GFP_SENSITIVE in fast-path cases where the
> overhead of kzfree() is unacceptable.

Thanks for coming to the conclusion that unconditional memory
sanitization is the correct approach.

I thought this had been stated numerous times before in this thread. Are
you serious about your responses or you are just clowning around? It's
amusing, I give you that much.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31  6:14                                             ` Pekka Enberg
@ 2009-05-31 12:16                                               ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 12:16 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 09:14 Sun 31 May     , Pekka Enberg wrote:
> Hi Alan,
> 
> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> >> It's pretty damn obvious that Larry's patches have a much bigger
> >> performance impact than using kzfree() for selected parts of the
> >> kernel. So yes, I do expect him to benchmark and demonstrate that
> >> kzfree() has _performance problems_ before we can look into merging
> >> his patches.
> >
> > We seem to be muddling up multiple things here which is not helpful.
> 
> Yup.
> 
> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > There are three things going on
> >
> > #1 Is ksize() buggy ?
> 
> No, there's nothing wrong with ksize() I am aware of. Yes, Larry has
> been saying it is but hasn't provided any evidence so far.

Excuse me, do you have an attention or reading disorder? Compound pages
and SLOB anyone? Duplication of test branches for pointer validation?

What are you trying to accomplish by claiming I've never provided
information which I sent to a public channel (this list)? You realize
someone who really cares can just navigate through the cesspool of
messages this thread became, and see the ones where I'm actually trying
to explain the situation to you?

It's amusing that at the expense of your egos, kernel security is ten years
lagging behind for Linux. And it's all to your (and well known others') credit.
Congratulations, and thank you for keeping it that way.

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #2 Using kzfree() to clear specific bits of memory (and I question the
> > kzfree implementation as it seems ksize can return numbers much much
> > bigger than the allocated space you need to clear - correct but oversize)
> > or using other flags. I'd favour kzfree personally (and fixing it to work
> > properly)
> 
> Well, yes, that's what kzfree() needs to do given the current API. I
> am not sure why you think it's a problem, though. Adding a size
> argument to the function will make it more error prone.

ksize is not designed to be used extensively at all. It's not the
intention of that API.

You should be implementing kzfree_skb and so forth. Just make sure the
definitions stay in header files I can ifdef 0 away when I patch my
kernel at the commodity of my own home, and use a solution which isn't
broken (or PaX itself). Removing all these calls will be quite a burden.

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #3 People wanting to be able to select for more security *irrespective*
> > of performance cost. Which is no different to SELinux for example.
> 
> Yeah, as I said before, I really don't have any objections to this. I
> just think nobody is going to enable it so memset() or kzfree() in
> relevant places is probably a good idea.

Fallacy man cometh! Let's assume everyone has the same exact lacking and
irresponsible security requirements you have, and try to make it look
like it's the real world. I know you are not alone there.

	Larry

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 12:16                                               ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 12:16 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 09:14 Sun 31 May     , Pekka Enberg wrote:
> Hi Alan,
> 
> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> >> It's pretty damn obvious that Larry's patches have a much bigger
> >> performance impact than using kzfree() for selected parts of the
> >> kernel. So yes, I do expect him to benchmark and demonstrate that
> >> kzfree() has _performance problems_ before we can look into merging
> >> his patches.
> >
> > We seem to be muddling up multiple things here which is not helpful.
> 
> Yup.
> 
> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > There are three things going on
> >
> > #1 Is ksize() buggy ?
> 
> No, there's nothing wrong with ksize() I am aware of. Yes, Larry has
> been saying it is but hasn't provided any evidence so far.

Excuse me, do you have an attention or reading disorder? Compound pages
and SLOB anyone? Duplication of test branches for pointer validation?

What are you trying to accomplish by claiming I've never provided
information which I sent to a public channel (this list)? You realize
someone who really cares can just navigate through the cesspool of
messages this thread became, and see the ones where I'm actually trying
to explain the situation to you?

It's amusing that at the expense of your egos, kernel security is ten years
lagging behind for Linux. And it's all to your (and well known others') credit.
Congratulations, and thank you for keeping it that way.

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #2 Using kzfree() to clear specific bits of memory (and I question the
> > kzfree implementation as it seems ksize can return numbers much much
> > bigger than the allocated space you need to clear - correct but oversize)
> > or using other flags. I'd favour kzfree personally (and fixing it to work
> > properly)
> 
> Well, yes, that's what kzfree() needs to do given the current API. I
> am not sure why you think it's a problem, though. Adding a size
> argument to the function will make it more error prone.

ksize is not designed to be used extensively at all. It's not the
intention of that API.

You should be implementing kzfree_skb and so forth. Just make sure the
definitions stay in header files I can ifdef 0 away when I patch my
kernel at the commodity of my own home, and use a solution which isn't
broken (or PaX itself). Removing all these calls will be quite a burden.

> On Sun, May 31, 2009 at 2:10 AM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > #3 People wanting to be able to select for more security *irrespective*
> > of performance cost. Which is no different to SELinux for example.
> 
> Yeah, as I said before, I really don't have any objections to this. I
> just think nobody is going to enable it so memset() or kzfree() in
> relevant places is probably a good idea.

Fallacy man cometh! Let's assume everyone has the same exact lacking and
irresponsible security requirements you have, and try to make it look
like it's the real world. I know you are not alone there.

	Larry

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-31 11:58                                               ` Larry H.
@ 2009-05-31 12:16                                                 ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 12:16 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sun, May 31, 2009 at 2:58 PM, Larry H. <research@subreption.com> wrote:
> Thanks for coming to the conclusion that unconditional memory
> sanitization is the correct approach.
>
> I thought this had been stated numerous times before in this thread. Are
> you serious about your responses or you are just clowning around? It's
> amusing, I give you that much.

So is this the same Larry that was able to have a productive and civil
discussion on #mm on IRC where he wanted me to ACK his patches? Or did
his evil identical twin take over the keyboard?

But anyway, enough is enough, and I really am not interested in this
discussion. I wish you the best of luck getting your patches merged. I
suspect you're gonna need it.

                                 Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 12:16                                                 ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 12:16 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sun, May 31, 2009 at 2:58 PM, Larry H. <research@subreption.com> wrote:
> Thanks for coming to the conclusion that unconditional memory
> sanitization is the correct approach.
>
> I thought this had been stated numerous times before in this thread. Are
> you serious about your responses or you are just clowning around? It's
> amusing, I give you that much.

So is this the same Larry that was able to have a productive and civil
discussion on #mm on IRC where he wanted me to ACK his patches? Or did
his evil identical twin take over the keyboard?

But anyway, enough is enough, and I really am not interested in this
discussion. I wish you the best of luck getting your patches merged. I
suspect you're gonna need it.

                                 Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31 12:16                                               ` Larry H.
@ 2009-05-31 12:19                                                 ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 12:19 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Larry H. wrote:
>> No, there's nothing wrong with ksize() I am aware of. Yes, Larry has
>> been saying it is but hasn't provided any evidence so far.
> 
> Excuse me, do you have an attention or reading disorder? Compound pages
> and SLOB anyone? Duplication of test branches for pointer validation?

I don't see a bug there. I cc'd Matt Mackall who is the author of SLOB. 
I am sure he will be able to spot the bug if it in fact exists (which I 
seriously doubt).

Feel free to prove me wrong by sending a patch to fix SLOB. But until 
then, please stop spamming my inbox. Thanks!

			Pekka

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 12:19                                                 ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 12:19 UTC (permalink / raw)
  To: Larry H.
  Cc: Alan Cox, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

Larry H. wrote:
>> No, there's nothing wrong with ksize() I am aware of. Yes, Larry has
>> been saying it is but hasn't provided any evidence so far.
> 
> Excuse me, do you have an attention or reading disorder? Compound pages
> and SLOB anyone? Duplication of test branches for pointer validation?

I don't see a bug there. I cc'd Matt Mackall who is the author of SLOB. 
I am sure he will be able to spot the bug if it in fact exists (which I 
seriously doubt).

Feel free to prove me wrong by sending a patch to fix SLOB. But until 
then, please stop spamming my inbox. Thanks!

			Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31 12:16                                                 ` Pekka Enberg
@ 2009-05-31 12:30                                                   ` Larry H.
  -1 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 12:30 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 15:16 Sun 31 May     , Pekka Enberg wrote:
> On Sun, May 31, 2009 at 2:58 PM, Larry H. <research@subreption.com> wrote:
> > Thanks for coming to the conclusion that unconditional memory
> > sanitization is the correct approach.
> >
> > I thought this had been stated numerous times before in this thread. Are
> > you serious about your responses or you are just clowning around? It's
> > amusing, I give you that much.
> 
> So is this the same Larry that was able to have a productive and civil
> discussion on #mm on IRC where he wanted me to ACK his patches? Or did
> his evil identical twin take over the keyboard?

I was merely making sure what your intentions were regarding the
patches. And it's clear you have utter disregard for them, and security
as a whole. In your idea of the world 'there are no secrets'.

Good luck with that.

> But anyway, enough is enough, and I really am not interested in this
> discussion. I wish you the best of luck getting your patches merged. I
> suspect you're gonna need it.

I submitted them so other people could benefit from it. In the end I
could care less about what you do with them, or if they are merged.
Now that is clear that trying to do so is a waste of time and energy,
I'm free to go with a clean conscience, knowing that these patches
didn't help others because I didn't try to, but because some council of
vagueness decided it doesn't fit their particularly flawed view of the
world.

See you in the next commit fixing a kernel vulnerability silently that
puts the infrastructure of several organizations at risk for your own
disregard.

	Larry.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 12:30                                                   ` Larry H.
  0 siblings, 0 replies; 220+ messages in thread
From: Larry H. @ 2009-05-31 12:30 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On 15:16 Sun 31 May     , Pekka Enberg wrote:
> On Sun, May 31, 2009 at 2:58 PM, Larry H. <research@subreption.com> wrote:
> > Thanks for coming to the conclusion that unconditional memory
> > sanitization is the correct approach.
> >
> > I thought this had been stated numerous times before in this thread. Are
> > you serious about your responses or you are just clowning around? It's
> > amusing, I give you that much.
> 
> So is this the same Larry that was able to have a productive and civil
> discussion on #mm on IRC where he wanted me to ACK his patches? Or did
> his evil identical twin take over the keyboard?

I was merely making sure what your intentions were regarding the
patches. And it's clear you have utter disregard for them, and security
as a whole. In your idea of the world 'there are no secrets'.

Good luck with that.

> But anyway, enough is enough, and I really am not interested in this
> discussion. I wish you the best of luck getting your patches merged. I
> suspect you're gonna need it.

I submitted them so other people could benefit from it. In the end I
could care less about what you do with them, or if they are merged.
Now that is clear that trying to do so is a waste of time and energy,
I'm free to go with a clean conscience, knowing that these patches
didn't help others because I didn't try to, but because some council of
vagueness decided it doesn't fit their particularly flawed view of the
world.

See you in the next commit fixing a kernel vulnerability silently that
puts the infrastructure of several organizations at risk for your own
disregard.

	Larry.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page  allocator
  2009-05-31 12:30                                                   ` Larry H.
@ 2009-05-31 12:35                                                     ` Pekka Enberg
  -1 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 12:35 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sun, May 31, 2009 at 3:30 PM, Larry H. <research@subreption.com> wrote:
> I was merely making sure what your intentions were regarding the
> patches. And it's clear you have utter disregard for them, and security
> as a whole. In your idea of the world 'there are no secrets'.

Hey, stop putting words in my mouth. Enjoy your 15 minutes of fame on
LKML as much as you want but please leave me out of it.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 12:35                                                     ` Pekka Enberg
  0 siblings, 0 replies; 220+ messages in thread
From: Pekka Enberg @ 2009-05-31 12:35 UTC (permalink / raw)
  To: Larry H.
  Cc: Rik van Riel, Ingo Molnar, Alan Cox, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

On Sun, May 31, 2009 at 3:30 PM, Larry H. <research@subreption.com> wrote:
> I was merely making sure what your intentions were regarding the
> patches. And it's clear you have utter disregard for them, and security
> as a whole. In your idea of the world 'there are no secrets'.

Hey, stop putting words in my mouth. Enjoy your 15 minutes of fame on
LKML as much as you want but please leave me out of it.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 10:39                           ` Peter Zijlstra
@ 2009-05-31 14:38                             ` Arjan van de Ven
  -1 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-31 14:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Larry H.,
	Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, 30 May 2009 12:39:33 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> > > So if you zero on free, the next allocation will reuse the zeroed
> > > page. And due to LIFO that is not too far out "often", which
> > > makes it likely the page is still in L2 cache.
> > 
> > Thanks for pointing this out clearly, Arjan.
> 
> Thing is, the time between allocation and use is typically orders of
> magnitude less than between free and use. 
> 
> 
> Really, get a life, go fix real bugs. Don't make our kernel slower 

the "make it slower" is an assumption on your part.
I'm not convinced. Would like to see data!

You're balancing a few things in your assumption
* The %age of pages that get zeroed on free, but not used in time and
  get flushed from L2 before they are used
* The %age of pages that today doesn't get zeroed 
versus
* The %age of the page that you are not going to read if you zero on use
  but does wipe a portion of L1 cache

add to that
* Reading a just allocated page is much more rare than writing to it.
  It's just zeros after all ;-)
  it is unclear (and cpu dependent) if writing makes it matter if the
  old (zero) data is in the cache or not, reducing the value of your
  "but it's now in the cache" value argument.
* My assumption is that allocations are more latency sensitive than
  free. After all, on allocate, you're going to use it, while on free
  you're done with what you wanted to do, and performance of that on
  average is assumed by me to matter less.
* We "need" to zero-on-allocate while holding the mmap semaphore,
  on free we clearly don't. We know this gives lock contention in 
  highly threaded workloads... and zero-on-free gets rid of that
  entirely.


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 14:38                             ` Arjan van de Ven
  0 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-31 14:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Larry H.,
	Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sat, 30 May 2009 12:39:33 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> > > So if you zero on free, the next allocation will reuse the zeroed
> > > page. And due to LIFO that is not too far out "often", which
> > > makes it likely the page is still in L2 cache.
> > 
> > Thanks for pointing this out clearly, Arjan.
> 
> Thing is, the time between allocation and use is typically orders of
> magnitude less than between free and use. 
> 
> 
> Really, get a life, go fix real bugs. Don't make our kernel slower 

the "make it slower" is an assumption on your part.
I'm not convinced. Would like to see data!

You're balancing a few things in your assumption
* The %age of pages that get zeroed on free, but not used in time and
  get flushed from L2 before they are used
* The %age of pages that today doesn't get zeroed 
versus
* The %age of the page that you are not going to read if you zero on use
  but does wipe a portion of L1 cache

add to that
* Reading a just allocated page is much more rare than writing to it.
  It's just zeros after all ;-)
  it is unclear (and cpu dependent) if writing makes it matter if the
  old (zero) data is in the cache or not, reducing the value of your
  "but it's now in the cache" value argument.
* My assumption is that allocations are more latency sensitive than
  free. After all, on allocate, you're going to use it, while on free
  you're done with what you wanted to do, and performance of that on
  average is assumed by me to matter less.
* We "need" to zero-on-allocate while holding the mmap semaphore,
  on free we clearly don't. We know this gives lock contention in 
  highly threaded workloads... and zero-on-free gets rid of that
  entirely.


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31 14:38                             ` Arjan van de Ven
@ 2009-05-31 15:03                               ` Arjan van de Ven
  -1 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-31 15:03 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Peter Zijlstra, Larry H.,
	Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sun, 31 May 2009 07:38:26 -0700
Arjan van de Ven <arjan@infradead.org> wrote:

> > 
> > 
> > Really, get a life, go fix real bugs. Don't make our kernel slower 
> 
> the "make it slower" is an assumption on your part.
> I'm not convinced. Would like to see data!
> 


btw if the performance difference is basically a wash (as I'm
suspecting), then we SHOULD do zero-on-free, just out of general
principles. 

Ingo mentioned the kernel stack, and that's a good point, we ought
to have a way to zero the rest of the stack inside the kernel, at
which point you could do things like providing a command line option
(or sysctl?) to call that from the munmap codepath or so...
(after all there you do a tlb flush and other expensive things as well)

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 15:03                               ` Arjan van de Ven
  0 siblings, 0 replies; 220+ messages in thread
From: Arjan van de Ven @ 2009-05-31 15:03 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Peter Zijlstra, Larry H.,
	Alan Cox, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec

On Sun, 31 May 2009 07:38:26 -0700
Arjan van de Ven <arjan@infradead.org> wrote:

> > 
> > 
> > Really, get a life, go fix real bugs. Don't make our kernel slower 
> 
> the "make it slower" is an assumption on your part.
> I'm not convinced. Would like to see data!
> 


btw if the performance difference is basically a wash (as I'm
suspecting), then we SHOULD do zero-on-free, just out of general
principles. 

Ingo mentioned the kernel stack, and that's a good point, we ought
to have a way to zero the rest of the stack inside the kernel, at
which point you could do things like providing a command line option
(or sysctl?) to call that from the munmap codepath or so...
(after all there you do a tlb flush and other expensive things as well)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-31 12:16                                               ` Larry H.
@ 2009-05-31 16:25                                                 ` Alan Cox
  -1 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-31 16:25 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

I am happy to help people who have trouble working with the community but
have something useful to offer and are willing to learn.

For the others I have a kill file, welcome to it.

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-31 16:25                                                 ` Alan Cox
  0 siblings, 0 replies; 220+ messages in thread
From: Alan Cox @ 2009-05-31 16:25 UTC (permalink / raw)
  To: Larry H.
  Cc: Pekka Enberg, Rik van Riel, Ingo Molnar, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar, pageexec, Linus Torvalds

I am happy to help people who have trouble working with the community but
have something useful to offer and are willing to learn.

For the others I have a kill file, welcome to it.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
  2009-05-30 14:45                                   ` Peter Zijlstra
  (?)
  (?)
@ 2009-06-05 13:15                                   ` Pavel Machek
  -1 siblings, 0 replies; 220+ messages in thread
From: Pavel Machek @ 2009-06-05 13:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alan Cox, pageexec, Larry H.,
	Arjan van de Ven, Ingo Molnar, Rik van Riel, linux-kernel,
	Linus Torvalds, linux-mm, Ingo Molnar

Hi!

> > > Right, so the whole point is to minimize the impact of actual bugs,
> > > right? So why not focus on fixing those actual bugs? Can we create tools
> > > to help us find such bugs faster? We use sparse for a lot of static
> > > checking, we create things like lockdep and kmemcheck to dynamically
> > > find trouble.
> > > 
> > > Can we instead of working around a problem, fix the actual problem?
> > 
> > Why do cars have crashworthiness and seatbelts ? Why not fix the actual
> > problem (driving errors) ? I mean lets face it they make the vehicle
> > heavier, less fuel efficient, less fun and more annoying to use.
> 
> We can't find every crash bug either, yet we still ship the kernel and
> people actually use it too.
> 
> What makes these security bugs so much more important than all the other
> ones?

Impact of normal bug is crash -- solved by reboot.

Impact of nasty bug is data corruption -- very rare, solved by
reinstall.

Impact of security bug is 'it is not your machine any more' (or worse,
as in 'it is not your bank account any more') -- reinstall needed,
too, and maybe worse.

So yes, I believe we should do some memory clearing.

> As long as that openoffice or firefox instance keeps running, there's
> nothing in the world the kernel can do to make it more secure.

True.

> If you really write documents that sekrit you simply shouldn't be using
> such software but use an editor that is written by people as paranoid as
> seems to be advocated here.

I may avoid openoffice but I'd still like vi on linux system. 
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 18:56 ` pageexec
  0 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-28 18:56 UTC (permalink / raw)
  To: Alan Cox, Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Linus Torvalds

On 28 May 2009 at 20:48, Alan Cox , Ingo Molnar wrote:

> last year while developing/debugging something else i also ran some kernel
> compilation tests and managed to dig out this one for you ('all' refers to
> all of PaX):
> 
> ------------------------------------------------------------------------------------------
> make -j4 2.6.24-rc7-i386-pax compiling 2.6.24-rc7-i386-pax (all with SANITIZE, no PARAVIRT)

addendum: i just checked and that version didn't omit GPF_ZERO handling therefore
the current version should have better performance, at least on this kind of workload
where lots of anonymous userland pages are instantiated.


^ permalink raw reply	[flat|nested] 220+ messages in thread

* Re: [patch 0/5] Support for sanitization flag in low-level page allocator
@ 2009-05-28 18:56 ` pageexec
  0 siblings, 0 replies; 220+ messages in thread
From: pageexec @ 2009-05-28 18:56 UTC (permalink / raw)
  To: Alan Cox, Ingo Molnar
  Cc: Rik van Riel, Larry H.,
	linux-kernel, Linus Torvalds, linux-mm, Ingo Molnar,
	Linus Torvalds

On 28 May 2009 at 20:48, Alan Cox , Ingo Molnar wrote:

> last year while developing/debugging something else i also ran some kernel
> compilation tests and managed to dig out this one for you ('all' refers to
> all of PaX):
> 
> ------------------------------------------------------------------------------------------
> make -j4 2.6.24-rc7-i386-pax compiling 2.6.24-rc7-i386-pax (all with SANITIZE, no PARAVIRT)

addendum: i just checked and that version didn't omit GPF_ZERO handling therefore
the current version should have better performance, at least on this kind of workload
where lots of anonymous userland pages are instantiated.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 220+ messages in thread

end of thread, other threads:[~2009-06-06  6:45 UTC | newest]

Thread overview: 220+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-20 18:30 [patch 0/5] Support for sanitization flag in low-level page allocator Larry H.
2009-05-20 18:30 ` Larry H.
2009-05-20 20:42 ` Peter Zijlstra
2009-05-20 20:42   ` Peter Zijlstra
2009-05-20 21:24   ` Larry H.
2009-05-20 21:24     ` Larry H.
2009-05-21 15:21     ` Robin Holt
2009-05-21 15:21       ` Robin Holt
2009-05-21 18:43       ` Larry H.
2009-05-21 18:43         ` Larry H.
2009-05-29 22:58     ` Andrew Morton
2009-05-29 22:58       ` Andrew Morton
2009-05-30  7:00       ` Larry H.
2009-05-30  7:12       ` Pekka Enberg
2009-05-30  7:12         ` Pekka Enberg
2009-05-30  7:35         ` Larry H.
2009-05-30  7:35           ` Larry H.
2009-05-30  7:39           ` Pekka Enberg
2009-05-30  7:39             ` Pekka Enberg
2009-05-21 19:08   ` Rik van Riel
2009-05-21 19:08     ` Rik van Riel
2009-05-21 19:26     ` Alan Cox
2009-05-21 19:26       ` Alan Cox
2009-05-21 19:56       ` Larry H.
2009-05-21 19:56         ` Larry H.
2009-05-21 20:47         ` Alan Cox
2009-05-21 20:47           ` Alan Cox
2009-05-21 21:46           ` Larry H.
2009-05-21 22:47             ` Alan Cox
2009-05-21 22:47               ` Alan Cox
2009-05-22 11:22               ` Larry H.
2009-05-22 11:22                 ` Larry H.
2009-05-22 13:37                 ` Alan Cox
2009-05-22 13:37                   ` Alan Cox
2009-05-26 19:02       ` Pavel Machek
2009-05-26 19:02         ` Pavel Machek
2009-05-21 19:17 ` Rik van Riel
2009-05-21 19:30   ` Larry H.
2009-05-22  7:34   ` Ingo Molnar
2009-05-22 11:38     ` Larry H.
2009-05-22 11:38       ` Larry H.
2009-05-22 13:39       ` Alan Cox
2009-05-22 13:39         ` Alan Cox
2009-05-22 18:03         ` Larry H.
2009-05-22 18:03           ` Larry H.
2009-05-22 18:21           ` Alan Cox
2009-05-22 18:21             ` Alan Cox
2009-05-22 23:25             ` [PATCH] Support for kernel memory sanitization Larry H.
2009-05-22 23:52               ` Randy Dunlap
2009-05-22 23:40             ` [patch 0/5] Support for sanitization flag in low-level page allocator Larry H.
2009-05-23  8:09               ` Alan Cox
2009-05-23  8:09                 ` Alan Cox
2009-05-23 15:56                 ` Arjan van de Ven
2009-05-23 15:56                   ` Arjan van de Ven
2009-05-23 18:21                   ` [PATCH] Support for unconditional page sanitization Larry H.
2009-05-23 18:21                     ` Larry H.
2009-05-23 21:05                     ` Arjan van de Ven
2009-05-23 21:05                       ` Arjan van de Ven
2009-05-24 10:19                       ` pageexec
2009-05-24 10:19                         ` pageexec
2009-05-24 16:38                         ` Arjan van de Ven
2009-05-24 16:38                           ` Arjan van de Ven
2009-05-28 19:36                   ` [patch 0/5] Support for sanitization flag in low-level page allocator Peter Zijlstra
2009-05-28 19:36                     ` Peter Zijlstra
2009-05-29 14:32                     ` Arjan van de Ven
2009-05-29 14:32                       ` Arjan van de Ven
2009-05-30  5:48                       ` Larry H.
2009-05-30  5:48                         ` Larry H.
2009-05-30 10:39                         ` Peter Zijlstra
2009-05-30 10:39                           ` Peter Zijlstra
2009-05-30 10:43                           ` Larry H.
2009-05-30 10:43                             ` Larry H.
2009-05-30 11:42                           ` pageexec
2009-05-30 11:42                             ` pageexec
2009-05-30 13:21                             ` Peter Zijlstra
2009-05-30 13:21                               ` Peter Zijlstra
2009-05-30 13:24                               ` Peter Zijlstra
2009-05-30 13:24                                 ` Peter Zijlstra
2009-05-30 13:54                               ` pageexec
2009-05-30 13:54                                 ` pageexec
2009-05-30 14:04                                 ` Larry H.
2009-05-30 14:04                                   ` Larry H.
2009-05-30 14:13                                 ` Rik van Riel
2009-05-30 14:13                                   ` Rik van Riel
2009-05-30 14:08                               ` Rik van Riel
2009-05-30 14:08                                 ` Rik van Riel
2009-05-30 14:30                               ` Alan Cox
2009-05-30 14:45                                 ` Peter Zijlstra
2009-05-30 14:45                                   ` Peter Zijlstra
2009-05-30 14:48                                   ` Rik van Riel
2009-05-30 14:48                                     ` Rik van Riel
2009-05-30 17:00                                     ` Larry H.
2009-05-30 17:00                                       ` Larry H.
2009-05-30 17:25                                       ` Larry H.
2009-05-30 17:25                                         ` Larry H.
2009-05-30 18:32                                         ` Ingo Molnar
2009-05-30 18:32                                           ` Ingo Molnar
2009-06-05 13:15                                   ` Pavel Machek
2009-05-31 14:38                           ` Arjan van de Ven
2009-05-31 14:38                             ` Arjan van de Ven
2009-05-31 15:03                             ` Arjan van de Ven
2009-05-31 15:03                               ` Arjan van de Ven
2009-05-22 18:37           ` Nai Xia
2009-05-22 18:37             ` Nai Xia
2009-05-22 19:18           ` Nai Xia
2009-05-22 19:18             ` Nai Xia
2009-05-23 12:49       ` Ingo Molnar
2009-05-23 12:49         ` Ingo Molnar
2009-05-23 22:28         ` Larry H.
2009-05-23 22:28           ` Larry H.
2009-05-23 22:42         ` Rik van Riel
2009-05-23 22:42           ` Rik van Riel
2009-05-25  1:17           ` [PATCH] Sanitize memory on kfree() and kmem_cache_free() Larry H.
2009-05-25  1:17             ` Larry H.
2009-05-27 22:34           ` [patch 0/5] Support for sanitization flag in low-level page allocator Ingo Molnar
2009-05-27 22:34             ` Ingo Molnar
2009-05-28  6:27             ` Alan Cox
2009-05-28  6:27               ` Alan Cox
2009-05-28  7:00               ` Larry H.
2009-05-28  7:00                 ` Larry H.
2009-05-28  9:08               ` Ingo Molnar
2009-05-28  9:08                 ` Ingo Molnar
2009-05-28 11:50                 ` Alan Cox
2009-05-28 11:50                   ` Alan Cox
2009-05-28 19:44                   ` Peter Zijlstra
2009-05-28 19:44                     ` Peter Zijlstra
2009-05-30  7:35                   ` Pekka Enberg
2009-05-30  7:35                     ` Pekka Enberg
2009-05-30  7:50                     ` Larry H.
2009-05-30  7:50                       ` Larry H.
2009-05-30  7:53                       ` Pekka Enberg
2009-05-30  7:53                         ` Pekka Enberg
2009-05-30  8:20                         ` Larry H.
2009-05-30  8:20                           ` Larry H.
2009-05-30  8:33                           ` Pekka Enberg
2009-05-30  8:33                             ` Pekka Enberg
2009-05-30 15:05                           ` Ray Lee
2009-05-30 15:05                             ` Ray Lee
2009-05-30 17:34                           ` Ingo Molnar
2009-05-30 17:34                             ` Ingo Molnar
2009-05-30 18:03                             ` Larry H.
2009-05-30 18:03                               ` Larry H.
2009-05-30 18:21                               ` Ingo Molnar
2009-05-30 18:21                                 ` Ingo Molnar
2009-05-30 18:45                                 ` Larry H.
2009-05-30 18:45                                   ` Larry H.
2009-05-30 19:08                                   ` Ingo Molnar
2009-05-30 19:08                                     ` Ingo Molnar
2009-05-30 20:39                                     ` Rik van Riel
2009-05-30 20:39                                       ` Rik van Riel
2009-05-30 20:53                                       ` Pekka Enberg
2009-05-30 20:53                                         ` Pekka Enberg
2009-05-30 21:33                                         ` Larry H.
2009-05-30 21:33                                           ` Larry H.
2009-05-30 23:13                                           ` Alan Cox
2009-05-30 23:13                                             ` Alan Cox
2009-05-30 23:18                                             ` Larry H.
2009-05-30 23:18                                               ` Larry H.
2009-05-31  6:30                                               ` Pekka Enberg
2009-05-31  6:30                                                 ` Pekka Enberg
2009-05-31 11:49                                                 ` Larry H.
2009-05-31 11:49                                                   ` Larry H.
2009-05-31  7:17                                           ` Pekka Enberg
2009-05-31  7:17                                             ` Pekka Enberg
2009-05-31 11:58                                             ` Larry H.
2009-05-31 11:58                                               ` Larry H.
2009-05-31 12:16                                               ` Pekka Enberg
2009-05-31 12:16                                                 ` Pekka Enberg
2009-05-31 12:30                                                 ` Larry H.
2009-05-31 12:30                                                   ` Larry H.
2009-05-31 12:35                                                   ` Pekka Enberg
2009-05-31 12:35                                                     ` Pekka Enberg
2009-05-30 23:10                                         ` Alan Cox
2009-05-30 23:10                                           ` Alan Cox
2009-05-31  6:14                                           ` Pekka Enberg
2009-05-31  6:14                                             ` Pekka Enberg
2009-05-31 10:24                                             ` Alan Cox
2009-05-31 10:24                                               ` Alan Cox
2009-05-31 10:24                                               ` Pekka Enberg
2009-05-31 10:24                                                 ` Pekka Enberg
2009-05-31 12:16                                             ` Larry H.
2009-05-31 12:16                                               ` Larry H.
2009-05-31 12:19                                               ` Pekka Enberg
2009-05-31 12:19                                                 ` Pekka Enberg
2009-05-31 16:25                                               ` Alan Cox
2009-05-31 16:25                                                 ` Alan Cox
2009-05-30 22:10                                       ` Ingo Molnar
2009-05-30 22:10                                         ` Ingo Molnar
2009-05-30 23:15                                         ` Alan Cox
2009-05-30 23:15                                           ` Alan Cox
2009-05-30 20:22                               ` Pekka Enberg
2009-05-30 20:22                                 ` Pekka Enberg
2009-05-30 22:14                                 ` Ingo Molnar
2009-05-30 22:14                                   ` Ingo Molnar
2009-05-30 17:39                         ` Ingo Molnar
2009-05-30 17:39                           ` Ingo Molnar
2009-05-30  7:57                       ` Pekka Enberg
2009-05-30  7:57                         ` Pekka Enberg
2009-05-30  9:05                         ` Larry H.
2009-05-30  9:05                           ` Larry H.
2009-05-30 17:46                           ` Ingo Molnar
2009-05-30 17:46                             ` Ingo Molnar
2009-05-30 18:09                             ` Larry H.
2009-05-30 18:09                               ` Larry H.
2009-05-30  8:31                     ` Alan Cox
2009-05-30  8:31                       ` Alan Cox
2009-05-30  8:35                       ` Pekka Enberg
2009-05-30  8:35                         ` Pekka Enberg
2009-05-30  9:27                         ` Larry H.
2009-05-30  9:27                           ` Larry H.
2009-05-28 18:48                 ` pageexec
2009-05-28 18:48                   ` pageexec
2009-05-30 17:50                   ` Ingo Molnar
2009-05-30 17:50                     ` Ingo Molnar
2009-05-28 12:48 ` Pavel Machek
2009-05-28 12:48   ` Pavel Machek
2009-05-28 12:55   ` Larry H.
2009-05-28 12:55     ` Larry H.
2009-05-28 18:56 pageexec
2009-05-28 18:56 ` pageexec

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.