LKML Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v4] zram: extend zero pages to same element pages
@ 2017-02-05 15:16 Minchan Kim
  2017-02-06 14:49 ` memfill Matthew Wilcox
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Minchan Kim @ 2017-02-05 15:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, sergey.senozhatsky, willy, iamjoonsoo.kim, ngupta,
	zhouxianrong, zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang,
	won.ho.park, Minchan Kim

From: zhouxianrong <zhouxianrong@huawei.com>

the idea is that without doing more calculations we extend zero pages
to same element pages for zram. zero page is special case of
same element page with zero element.

1. the test is done under android 7.0
2. startup too many applications circularly
3. sample the zero pages, same pages (none-zero element)
   and total pages in function page_zero_filled

the result is listed as below:

ZERO	SAME	TOTAL
36214	17842	598196

		ZERO/TOTAL	 SAME/TOTAL	  (ZERO+SAME)/TOTAL ZERO/SAME
AVERAGE	0.060631909	 0.024990816  0.085622726		2.663825038
STDEV	0.00674612	 0.005887625  0.009707034		2.115881328
MAX		0.069698422	 0.030046087  0.094975336		7.56043956
MIN		0.03959586	 0.007332205  0.056055193		1.928985507

from above data, the benefit is about 2.5% and up to 3% of total
swapout pages.

the defect of the patch is that when we recovery a page from
non-zero element the operations are low efficient for partial
read.

This patch extend zero_page to same_page so if there is any user to have
monitored zero_pages, he will be surprised if the number is increased
but it's no harmful, I believe.

Link: http://lkml.kernel.org/r/1483692145-75357-1-git-send-email-zhouxianrong@huawei.com
Signed-off-by: zhouxianrong <zhouxianrong@huawei.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 Documentation/blockdev/zram.txt |  6 ++--
 drivers/block/zram/zram_drv.c   | 80 ++++++++++++++++++++++++++++-------------
 drivers/block/zram/zram_drv.h   |  9 +++--
 3 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 1c0c08d..4fced8a 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -201,8 +201,8 @@ File /sys/block/zram<id>/mm_stat
 The stat file represents device's mm statistics. It consists of a single
 line of text and contains the following stats separated by whitespace:
  orig_data_size   uncompressed size of data stored in this disk.
-                  This excludes zero-filled pages (zero_pages) since no
-                  memory is allocated for them.
+		  This excludes same-element-filled pages (same_pages) since
+		  no memory is allocated for them.
                   Unit: bytes
  compr_data_size  compressed size of data stored in this disk
  mem_used_total   the amount of memory allocated for this disk. This
@@ -214,7 +214,7 @@ The stat file represents device's mm statistics. It consists of a single
                   the compressed data
  mem_used_max     the maximum amount of memory zram have consumed to
                   store the data
- zero_pages       the number of zero filled pages written to this disk.
+ same_pages       the number of same element filled pages written to this disk.
                   No memory is allocated for such pages.
  pages_compacted  the number of pages freed during compaction
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index eaeff06..c20b05a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -74,6 +74,17 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index,
 	meta->table[index].value &= ~BIT(flag);
 }
 
+static inline void zram_set_element(struct zram_meta *meta, u32 index,
+			unsigned long element)
+{
+	meta->table[index].element = element;
+}
+
+static inline void zram_clear_element(struct zram_meta *meta, u32 index)
+{
+	meta->table[index].element = 0;
+}
+
 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
 {
 	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
@@ -146,31 +157,46 @@ static inline void update_used_max(struct zram *zram,
 	} while (old_max != cur_max);
 }
 
-static bool page_zero_filled(void *ptr)
+static inline void zram_fill_page(char *ptr, unsigned long len,
+					unsigned long value)
+{
+	int i;
+	unsigned long *page = (unsigned long *)ptr;
+
+	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
+
+	if (likely(value == 0)) {
+		memset(ptr, 0, len);
+	} else {
+		for (i = 0; i < len / sizeof(*page); i++)
+			page[i] = value;
+	}
+}
+
+static bool page_same_filled(void *ptr, unsigned long *element)
 {
 	unsigned int pos;
 	unsigned long *page;
 
 	page = (unsigned long *)ptr;
 
-	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
-		if (page[pos])
+	for (pos = 0; pos < PAGE_SIZE / sizeof(*page) - 1; pos++) {
+		if (page[pos] != page[pos + 1])
 			return false;
 	}
 
+	*element = page[pos];
+
 	return true;
 }
 
-static void handle_zero_page(struct bio_vec *bvec)
+static void handle_same_page(struct bio_vec *bvec, unsigned long element)
 {
 	struct page *page = bvec->bv_page;
 	void *user_mem;
 
 	user_mem = kmap_atomic(page);
-	if (is_partial_io(bvec))
-		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
-	else
-		clear_page(user_mem);
+	zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element);
 	kunmap_atomic(user_mem);
 
 	flush_dcache_page(page);
@@ -363,7 +389,7 @@ static ssize_t mm_stat_show(struct device *dev,
 			mem_used << PAGE_SHIFT,
 			zram->limit_pages << PAGE_SHIFT,
 			max_used << PAGE_SHIFT,
-			(u64)atomic64_read(&zram->stats.zero_pages),
+			(u64)atomic64_read(&zram->stats.same_pages),
 			pool_stats.pages_compacted);
 	up_read(&zram->init_lock);
 
@@ -450,18 +476,20 @@ static void zram_free_page(struct zram *zram, size_t index)
 	struct zram_meta *meta = zram->meta;
 	unsigned long handle = meta->table[index].handle;
 
-	if (unlikely(!handle)) {
-		/*
-		 * No memory is allocated for zero filled pages.
-		 * Simply clear zero page flag.
-		 */
-		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
-			zram_clear_flag(meta, index, ZRAM_ZERO);
-			atomic64_dec(&zram->stats.zero_pages);
-		}
+	/*
+	 * No memory is allocated for same element filled pages.
+	 * Simply clear same page flag.
+	 */
+	if (zram_test_flag(meta, index, ZRAM_SAME)) {
+		zram_clear_flag(meta, index, ZRAM_SAME);
+		zram_clear_element(meta, index);
+		atomic64_dec(&zram->stats.same_pages);
 		return;
 	}
 
+	if (!handle)
+		return;
+
 	zs_free(meta->mem_pool, handle);
 
 	atomic64_sub(zram_get_obj_size(meta, index),
@@ -484,9 +512,9 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
 	handle = meta->table[index].handle;
 	size = zram_get_obj_size(meta, index);
 
-	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+	if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-		clear_page(mem);
+		zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
 		return 0;
 	}
 
@@ -522,9 +550,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 
 	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
 	if (unlikely(!meta->table[index].handle) ||
-			zram_test_flag(meta, index, ZRAM_ZERO)) {
+			zram_test_flag(meta, index, ZRAM_SAME)) {
 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-		handle_zero_page(bvec);
+		handle_same_page(bvec, meta->table[index].element);
 		return 0;
 	}
 	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
@@ -572,6 +600,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 	struct zram_meta *meta = zram->meta;
 	struct zcomp_strm *zstrm = NULL;
 	unsigned long alloced_pages;
+	unsigned long element;
 
 	page = bvec->bv_page;
 	if (is_partial_io(bvec)) {
@@ -600,16 +629,17 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 		uncmem = user_mem;
 	}
 
-	if (page_zero_filled(uncmem)) {
+	if (page_same_filled(uncmem, &element)) {
 		if (user_mem)
 			kunmap_atomic(user_mem);
 		/* Free memory associated with this sector now. */
 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
 		zram_free_page(zram, index);
-		zram_set_flag(meta, index, ZRAM_ZERO);
+		zram_set_flag(meta, index, ZRAM_SAME);
+		zram_set_element(meta, index, element);
 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
-		atomic64_inc(&zram->stats.zero_pages);
+		atomic64_inc(&zram->stats.same_pages);
 		ret = 0;
 		goto out;
 	}
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 2692554..caeff51 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -61,7 +61,7 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 /* Flags for zram pages (table[page_no].value) */
 enum zram_pageflags {
 	/* Page consists entirely of zeros */
-	ZRAM_ZERO = ZRAM_FLAG_SHIFT,
+	ZRAM_SAME = ZRAM_FLAG_SHIFT,
 	ZRAM_ACCESS,	/* page is now accessed */
 
 	__NR_ZRAM_PAGEFLAGS,
@@ -71,7 +71,10 @@ enum zram_pageflags {
 
 /* Allocated for each disk page */
 struct zram_table_entry {
-	unsigned long handle;
+	union {
+		unsigned long handle;
+		unsigned long element;
+	};
 	unsigned long value;
 };
 
@@ -83,7 +86,7 @@ struct zram_stats {
 	atomic64_t failed_writes;	/* can happen when memory is too low */
 	atomic64_t invalid_io;	/* non-page-aligned I/O requests */
 	atomic64_t notify_free;	/* no. of swap slot free notifications */
-	atomic64_t zero_pages;		/* no. of zero filled pages */
+	atomic64_t same_pages;		/* no. of same element filled pages */
 	atomic64_t pages_stored;	/* no. of pages currently stored */
 	atomic_long_t max_used_pages;	/* no. of maximum pages stored */
 	atomic64_t writestall;		/* no. of write slow paths */
-- 
2.7.4

^ permalink raw reply	[flat|nested] 15+ messages in thread

* memfill
  2017-02-05 15:16 [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
@ 2017-02-06 14:49 ` Matthew Wilcox
  2017-02-07  2:47   ` memfill zhouxianrong
                     ` (2 more replies)
  2017-02-07  6:57 ` [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 15+ messages in thread
From: Matthew Wilcox @ 2017-02-06 14:49 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Andrew Morton, linux-kernel, linux-arch, sergey.senozhatsky,
	iamjoonsoo.kim, ngupta, zhouxianrong, zhouxiyu, weidu.du,
	zhangshiming5, Mi.Sophia.Wang, won.ho.park


[adding linux-arch to see if anyone there wants to do an optimised
version of memfill for their CPU]

On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
> +static inline void zram_fill_page(char *ptr, unsigned long len,
> +					unsigned long value)
> +{
> +	int i;
> +	unsigned long *page = (unsigned long *)ptr;
> +
> +	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
> +
> +	if (likely(value == 0)) {
> +		memset(ptr, 0, len);
> +	} else {
> +		for (i = 0; i < len / sizeof(*page); i++)
> +			page[i] = value;
> +	}
> +}

I would suggest:

#ifndef __HAVE_ARCH_MEMFILL
/** 
 * memfill - Fill a region of memory with the given value
 * @s: Pointer to the start of the region.
 * @v: The word to fill the region with.
 * @n: The size of the region.
 * 
 * Differs from memset() in that it fills with an unsigned long instead of 
 * a byte.  The pointer and the size must be aligned to unsigned long.
 */
void memfill(unsigned long *s, unsigned long v, size_t n)
{
	WARN_ON_ONCE(!IS_ALIGNED(n, sizeof(v)));

	if (likely(v == 0)) {
		memset(s, 0, n);
	} else {
		while (n) {
			*s++ = v;
			n -= sizeof(v);
		}
	}
}
EXPORT_SYMBOL(memfill);
#endif

(I would also suggest this move to lib/string.c and architectures be
given the opportunity to provide an optimised version of memfill).

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-06 14:49 ` memfill Matthew Wilcox
@ 2017-02-07  2:47   ` zhouxianrong
  2017-02-07  4:59   ` memfill Minchan Kim
  2017-02-07 19:07   ` memfill James Bottomley
  2 siblings, 0 replies; 15+ messages in thread
From: zhouxianrong @ 2017-02-07  2:47 UTC (permalink / raw)
  To: Matthew Wilcox, Minchan Kim
  Cc: Andrew Morton, linux-kernel, linux-arch, sergey.senozhatsky,
	iamjoonsoo.kim, ngupta, zhouxiyu, weidu.du, zhangshiming5,
	Mi.Sophia.Wang, won.ho.park



On 2017/2/6 22:49, Matthew Wilcox wrote:
>
> [adding linux-arch to see if anyone there wants to do an optimised
> version of memfill for their CPU]
>
> On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
>> +static inline void zram_fill_page(char *ptr, unsigned long len,
>> +					unsigned long value)
>> +{
>> +	int i;
>> +	unsigned long *page = (unsigned long *)ptr;
>> +
>> +	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
>> +
>> +	if (likely(value == 0)) {
>> +		memset(ptr, 0, len);
>> +	} else {
>> +		for (i = 0; i < len / sizeof(*page); i++)
>> +			page[i] = value;
>> +	}
>> +}
>
> I would suggest:
>
> #ifndef __HAVE_ARCH_MEMFILL
> /**
>  * memfill - Fill a region of memory with the given value
>  * @s: Pointer to the start of the region.
>  * @v: The word to fill the region with.
>  * @n: The size of the region.
>  *
>  * Differs from memset() in that it fills with an unsigned long instead of
>  * a byte.  The pointer and the size must be aligned to unsigned long.
>  */
> void memfill(unsigned long *s, unsigned long v, size_t n)
> {
> 	WARN_ON_ONCE(!IS_ALIGNED(n, sizeof(v)));
>
> 	if (likely(v == 0)) {
> 		memset(s, 0, n);
> 	} else {
> 		while (n) {
> 			*s++ = v;
> 			n -= sizeof(v);
> 		}
> 	}
> }
> EXPORT_SYMBOL(memfill);
> #endif
>
> (I would also suggest this move to lib/string.c and architectures be
> given the opportunity to provide an optimised version of memfill).
>

good idea, i hope kernel could support family functions like memfill/memset_long
in lib. but this is beyond zram scope.
>
> .
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-06 14:49 ` memfill Matthew Wilcox
  2017-02-07  2:47   ` memfill zhouxianrong
@ 2017-02-07  4:59   ` Minchan Kim
  2017-02-07 19:07   ` memfill James Bottomley
  2 siblings, 0 replies; 15+ messages in thread
From: Minchan Kim @ 2017-02-07  4:59 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, linux-kernel, linux-arch, sergey.senozhatsky,
	iamjoonsoo.kim, ngupta, zhouxianrong, zhouxiyu, weidu.du,
	zhangshiming5, Mi.Sophia.Wang, won.ho.park

Hi Matthew,

On Mon, Feb 06, 2017 at 06:49:02AM -0800, Matthew Wilcox wrote:
> 
> [adding linux-arch to see if anyone there wants to do an optimised
> version of memfill for their CPU]
> 
> On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
> > +static inline void zram_fill_page(char *ptr, unsigned long len,
> > +					unsigned long value)
> > +{
> > +	int i;
> > +	unsigned long *page = (unsigned long *)ptr;
> > +
> > +	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
> > +
> > +	if (likely(value == 0)) {
> > +		memset(ptr, 0, len);
> > +	} else {
> > +		for (i = 0; i < len / sizeof(*page); i++)
> > +			page[i] = value;
> > +	}
> > +}
> 
> I would suggest:
> 
> #ifndef __HAVE_ARCH_MEMFILL
> /** 
>  * memfill - Fill a region of memory with the given value
>  * @s: Pointer to the start of the region.
>  * @v: The word to fill the region with.
>  * @n: The size of the region.
>  * 
>  * Differs from memset() in that it fills with an unsigned long instead of 
>  * a byte.  The pointer and the size must be aligned to unsigned long.
>  */
> void memfill(unsigned long *s, unsigned long v, size_t n)
> {
> 	WARN_ON_ONCE(!IS_ALIGNED(n, sizeof(v)));
> 
> 	if (likely(v == 0)) {
> 		memset(s, 0, n);
> 	} else {
> 		while (n) {
> 			*s++ = v;
> 			n -= sizeof(v);
> 		}
> 	}
> }
> EXPORT_SYMBOL(memfill);
> #endif
> 
> (I would also suggest this move to lib/string.c and architectures be
> given the opportunity to provide an optimised version of memfill).

Thanks for suggestion, Matthew!

However, I want to mention zram's performance wouldn't be sensitive
for that function because non-zero memfill would be rare.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4] zram: extend zero pages to same element pages
  2017-02-05 15:16 [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
  2017-02-06 14:49 ` memfill Matthew Wilcox
@ 2017-02-07  6:57 ` Minchan Kim
  2017-02-07  9:40 ` memfill David Howells
  2017-03-11 14:56 ` memfill v2 now with ARM and x86 implementations Matthew Wilcox
  3 siblings, 0 replies; 15+ messages in thread
From: Minchan Kim @ 2017-02-07  6:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, sergey.senozhatsky, willy, iamjoonsoo.kim, ngupta,
	zhouxianrong, zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang,
	won.ho.park

Hi Andrew,

On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
> From: zhouxianrong <zhouxianrong@huawei.com>
> 
> the idea is that without doing more calculations we extend zero pages
> to same element pages for zram. zero page is special case of
> same element page with zero element.
> 
> 1. the test is done under android 7.0
> 2. startup too many applications circularly
> 3. sample the zero pages, same pages (none-zero element)
>    and total pages in function page_zero_filled
> 
> the result is listed as below:
> 
> ZERO	SAME	TOTAL
> 36214	17842	598196
> 
> 		ZERO/TOTAL	 SAME/TOTAL	  (ZERO+SAME)/TOTAL ZERO/SAME
> AVERAGE	0.060631909	 0.024990816  0.085622726		2.663825038
> STDEV	0.00674612	 0.005887625  0.009707034		2.115881328
> MAX		0.069698422	 0.030046087  0.094975336		7.56043956
> MIN		0.03959586	 0.007332205  0.056055193		1.928985507
> 
> from above data, the benefit is about 2.5% and up to 3% of total
> swapout pages.
> 
> the defect of the patch is that when we recovery a page from
> non-zero element the operations are low efficient for partial
> read.
> 
> This patch extend zero_page to same_page so if there is any user to have
> monitored zero_pages, he will be surprised if the number is increased
> but it's no harmful, I believe.
> 
> Link: http://lkml.kernel.org/r/1483692145-75357-1-git-send-email-zhouxianrong@huawei.com
> Signed-off-by: zhouxianrong <zhouxianrong@huawei.com>
> Signed-off-by: Minchan Kim <minchan@kernel.org>

zhouxianrong pointed out we shouldn't free same element pages in zram_meta_free.
Otherwise, it can encounter oops due to invalid handle passed into zram_free.
This patch fixes it. Please fold this patch into zram-extend-zero-pages-to-same-element-pages.patch.

Thanks.


>From e5a2a1dac4783f29d028170724578c0d11c80975 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 7 Feb 2017 12:00:05 +0900
Subject: [PATCH] zram: do not free same element pages in zram_meta_free

zhouxianrong pointed out that we shouldn't free same element pages
in zram_meta_free. Otherwise, it will encounter oops due to invalid
handle value.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
Andrew,

Please fold it to zram-extend-zero-pages-to-same-element-pages.patch.

Thanks.

 drivers/block/zram/zram_drv.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c20b05a84f21..a25d34a8af19 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -425,8 +425,11 @@ static void zram_meta_free(struct zram_meta *meta, u64 disksize)
 	/* Free all pages that are still in this zram device */
 	for (index = 0; index < num_pages; index++) {
 		unsigned long handle = meta->table[index].handle;
-
-		if (!handle)
+		/*
+		 * No memory is allocated for same element filled pages.
+		 * Simply clear same page flag.
+		 */
+		if (!handle || zram_test_flag(meta, index, ZRAM_SAME))
 			continue;
 
 		zs_free(meta->mem_pool, handle);
-- 
2.7.4

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-05 15:16 [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
  2017-02-06 14:49 ` memfill Matthew Wilcox
  2017-02-07  6:57 ` [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
@ 2017-02-07  9:40 ` David Howells
  2017-02-07 17:22   ` memfill Matthew Wilcox
  2017-02-07 17:29   ` memfill David Howells
  2017-03-11 14:56 ` memfill v2 now with ARM and x86 implementations Matthew Wilcox
  3 siblings, 2 replies; 15+ messages in thread
From: David Howells @ 2017-02-07  9:40 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: dhowells, Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park

Matthew Wilcox <willy@infradead.org> wrote:

> [adding linux-arch to see if anyone there wants to do an optimised
> version of memfill for their CPU]

For mn10300, this is superfluous since the memset() implementation will do
optimised filling of up to 8 x 4 bytes per loop if the alignments suit.

This is also superfluous for frv as that will do up to 8 x 8 bytes per loop.

So on both those arches, memfill() should probably just wrap memset().

David

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-07  9:40 ` memfill David Howells
@ 2017-02-07 17:22   ` Matthew Wilcox
  2017-02-07 17:29   ` memfill David Howells
  1 sibling, 0 replies; 15+ messages in thread
From: Matthew Wilcox @ 2017-02-07 17:22 UTC (permalink / raw)
  To: David Howells
  Cc: Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park

On Tue, Feb 07, 2017 at 09:40:04AM +0000, David Howells wrote:
> Matthew Wilcox <willy@infradead.org> wrote:
> 
> > [adding linux-arch to see if anyone there wants to do an optimised
> > version of memfill for their CPU]
> 
> For mn10300, this is superfluous since the memset() implementation will do
> optimised filling of up to 8 x 4 bytes per loop if the alignments suit.
> 
> This is also superfluous for frv as that will do up to 8 x 8 bytes per loop.
> 
> So on both those arches, memfill() should probably just wrap memset().

You've misunderstood the purpose of memfill.  memfill allows the caller
to specify a pattern which is not a single byte in size, eg memfill(addr,
0x12345678, 64) would result in 0x12345678 being reproduced 16 times.
memset(addr, 0x12345678, 64) would result in 0x78 being reproduced
64 times.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-07  9:40 ` memfill David Howells
  2017-02-07 17:22   ` memfill Matthew Wilcox
@ 2017-02-07 17:29   ` David Howells
  2017-02-07 19:03     ` memfill Matthew Wilcox
  1 sibling, 1 reply; 15+ messages in thread
From: David Howells @ 2017-02-07 17:29 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: dhowells, Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park

Matthew Wilcox <willy@infradead.org> wrote:

> You've misunderstood the purpose of memfill.  memfill allows the caller
> to specify a pattern which is not a single byte in size, eg memfill(addr,
> 0x12345678, 64) would result in 0x12345678 being reproduced 16 times.
> memset(addr, 0x12345678, 64) would result in 0x78 being reproduced
> 64 times.

Ah.  Should it take a unsigned int rather than an unsigned long?

David

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-07 17:29   ` memfill David Howells
@ 2017-02-07 19:03     ` Matthew Wilcox
  0 siblings, 0 replies; 15+ messages in thread
From: Matthew Wilcox @ 2017-02-07 19:03 UTC (permalink / raw)
  To: David Howells
  Cc: Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park

On Tue, Feb 07, 2017 at 05:29:15PM +0000, David Howells wrote:
> Matthew Wilcox <willy@infradead.org> wrote:
> 
> > You've misunderstood the purpose of memfill.  memfill allows the caller
> > to specify a pattern which is not a single byte in size, eg memfill(addr,
> > 0x12345678, 64) would result in 0x12345678 being reproduced 16 times.
> > memset(addr, 0x12345678, 64) would result in 0x78 being reproduced
> > 64 times.
> 
> Ah.  Should it take a unsigned int rather than an unsigned long?

Well, our one user so far is looking to use an unsigned long (indeed,
wouldn't be able to use it if it took an unsigned int).  If we have users
who want to memfill with an unsigned int, they can do something like:

void memfill_int(int *a, unsigned int v, size_t n)
{
	if ((unsigned long)a & (sizeof(unsigned long) - 1)) {
		*a++ = v;
		n -= sizeof(unsigned int);
	}
	memfill((unsigned long *)a, v | ((v << 16) << 16), n);
	if (n & (sizeof(unsigned long) - 1)
		a[n / sizeof(v)] = v;
}

... but since we know of no users today, I don't want to put that in.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-06 14:49 ` memfill Matthew Wilcox
  2017-02-07  2:47   ` memfill zhouxianrong
  2017-02-07  4:59   ` memfill Minchan Kim
@ 2017-02-07 19:07   ` James Bottomley
  2017-02-08 18:04     ` memfill Matthew Wilcox
  2 siblings, 1 reply; 15+ messages in thread
From: James Bottomley @ 2017-02-07 19:07 UTC (permalink / raw)
  To: Matthew Wilcox, Minchan Kim
  Cc: Andrew Morton, linux-kernel, linux-arch, sergey.senozhatsky,
	iamjoonsoo.kim, ngupta, zhouxianrong, zhouxiyu, weidu.du,
	zhangshiming5, Mi.Sophia.Wang, won.ho.park

On Mon, 2017-02-06 at 06:49 -0800, Matthew Wilcox wrote:
> [adding linux-arch to see if anyone there wants to do an optimised
> version of memfill for their CPU]
> 
> On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
> > +static inline void zram_fill_page(char *ptr, unsigned long len,
> > +					unsigned long value)
> > +{
> > +	int i;
> > +	unsigned long *page = (unsigned long *)ptr;
> > +
> > +	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
> > +
> > +	if (likely(value == 0)) {
> > +		memset(ptr, 0, len);
> > +	} else {
> > +		for (i = 0; i < len / sizeof(*page); i++)
> > +			page[i] = value;
> > +	}
> > +}
> 
> I would suggest:
> 
> #ifndef __HAVE_ARCH_MEMFILL
> /** 
>  * memfill - Fill a region of memory with the given value
>  * @s: Pointer to the start of the region.
>  * @v: The word to fill the region with.
>  * @n: The size of the region.
>  * 
>  * Differs from memset() in that it fills with an unsigned long
> instead of 
>  * a byte.  The pointer and the size must be aligned to unsigned
> long.
>  */
> void memfill(unsigned long *s, unsigned long v, size_t n)

If we're going to do this, are you sure we wouldn't be wanting a string
fill instead of a memfill (because filling either by byte or long looks
a bit restrictive) assuming static strings that we can tell the compile
time size of, it would be easy for generic code to optimise.

James

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-07 19:07   ` memfill James Bottomley
@ 2017-02-08 18:04     ` Matthew Wilcox
  2017-02-08 21:01       ` memfill James Bottomley
  0 siblings, 1 reply; 15+ messages in thread
From: Matthew Wilcox @ 2017-02-08 18:04 UTC (permalink / raw)
  To: James Bottomley
  Cc: Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park,
	liw

On Tue, Feb 07, 2017 at 11:07:34AM -0800, James Bottomley wrote:
> > /** 
> >  * memfill - Fill a region of memory with the given value
> >  * @s: Pointer to the start of the region.
> >  * @v: The word to fill the region with.
> >  * @n: The size of the region.
> >  * 
> >  * Differs from memset() in that it fills with an unsigned long
> > instead of 
> >  * a byte.  The pointer and the size must be aligned to unsigned
> > long.
> >  */
> > void memfill(unsigned long *s, unsigned long v, size_t n)
> 
> If we're going to do this, are you sure we wouldn't be wanting a string
> fill instead of a memfill (because filling either by byte or long looks
> a bit restrictive) assuming static strings that we can tell the compile
> time size of, it would be easy for generic code to optimise.

When you say 'string fill', do you mean this?

void memfill(void *dst, size_t dsz, void *src, size_t ssz)
{
	if (ssz == 1) {
		memset(dst, *(unsigned char *)src, dsz);
	} else if (ssz == sizeof(short)) {
		memset_s(dst, *(unsigned short *)src, dsz);
	} else if (ssz == sizeof(int)) {
		memset_i(dst, *(unsigned int *)src, dsz);
	} else if (ssz == sizeof(long)) {
		memset_l(dst, *(unsigned long *)src, dsz);
	} else {
		while (dsz >= ssz) {
			memcpy(dst, src, ssz);
			dsz -= ssz;
			dst += ssz;
		}
		if (dsz)
			memcpy(dst. src. dsz);
	}
}

(with memset_[sil] being obvious, I hope).  Possibly some variation on
this to optimise compile-time constant dsz.

I have no objection to that.  Indeed, it would match Lars Wirzenius'
memfill() definition (if not implementation) which makes me quite happy.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-08 18:04     ` memfill Matthew Wilcox
@ 2017-02-08 21:01       ` James Bottomley
  2017-02-08 21:54         ` memfill Matthew Wilcox
  0 siblings, 1 reply; 15+ messages in thread
From: James Bottomley @ 2017-02-08 21:01 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park,
	liw

On Wed, 2017-02-08 at 10:04 -0800, Matthew Wilcox wrote:
> On Tue, Feb 07, 2017 at 11:07:34AM -0800, James Bottomley wrote:
> > > /** 
> > >  * memfill - Fill a region of memory with the given value
> > >  * @s: Pointer to the start of the region.
> > >  * @v: The word to fill the region with.
> > >  * @n: The size of the region.
> > >  * 
> > >  * Differs from memset() in that it fills with an unsigned long
> > > instead of 
> > >  * a byte.  The pointer and the size must be aligned to unsigned
> > > long.
> > >  */
> > > void memfill(unsigned long *s, unsigned long v, size_t n)
> > 
> > If we're going to do this, are you sure we wouldn't be wanting a
> > string
> > fill instead of a memfill (because filling either by byte or long
> > looks
> > a bit restrictive) assuming static strings that we can tell the
> > compile
> > time size of, it would be easy for generic code to optimise.
> 
> When you say 'string fill', do you mean this?
> 
> void memfill(void *dst, size_t dsz, void *src, size_t ssz)
> {
> 	if (ssz == 1) {
> 		memset(dst, *(unsigned char *)src, dsz);
> 	} else if (ssz == sizeof(short)) {
> 		memset_s(dst, *(unsigned short *)src, dsz);
> 	} else if (ssz == sizeof(int)) {
> 		memset_i(dst, *(unsigned int *)src, dsz);
> 	} else if (ssz == sizeof(long)) {
> 		memset_l(dst, *(unsigned long *)src, dsz);
> 	} else {
> 		while (dsz >= ssz) {
> 			memcpy(dst, src, ssz);
> 			dsz -= ssz;
> 			dst += ssz;
> 		}
> 		if (dsz)
> 			memcpy(dst. src. dsz);
> 	}
> }
> 
> (with memset_[sil] being obvious, I hope).  Possibly some variation 
> on this to optimise compile-time constant dsz.
> 
> I have no objection to that.  Indeed, it would match Lars Wirzenius'
> memfill() definition (if not implementation) which makes me quite 
> happy.

Yes, that's about it.  My only qualm looking at the proposal was if
memfill is genuinely useful to something why would it only want to fill
in units of sizeof(long).  On the other hand, we've been operating for
decades without it, so perhaps memset_l is the only use case?

James

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill
  2017-02-08 21:01       ` memfill James Bottomley
@ 2017-02-08 21:54         ` Matthew Wilcox
  0 siblings, 0 replies; 15+ messages in thread
From: Matthew Wilcox @ 2017-02-08 21:54 UTC (permalink / raw)
  To: James Bottomley
  Cc: Minchan Kim, Andrew Morton, linux-kernel, linux-arch,
	sergey.senozhatsky, iamjoonsoo.kim, ngupta, zhouxianrong,
	zhouxiyu, weidu.du, zhangshiming5, Mi.Sophia.Wang, won.ho.park,
	liw

On Wed, Feb 08, 2017 at 01:01:08PM -0800, James Bottomley wrote:
> Yes, that's about it.  My only qualm looking at the proposal was if
> memfill is genuinely useful to something why would it only want to fill
> in units of sizeof(long).  On the other hand, we've been operating for
> decades without it, so perhaps memset_l is the only use case?

I suspect we've grown hundreds of unoptimised implementations of this all
over the kernel.  I mean, look at the attitude of the zram developers when
I suggested memfill: "this is beyond zram scope."  I think finding all of
these is beyond the abilities of grep.  maybe coccinelle could find some?

Instead I chose a driver at random that both you and I are familiar with,
sym53c8xx_2.  Oh, look, here's one:

        np->badlun_sa = cpu_to_scr(SCRIPTB_BA(np, resel_bad_lun));
        for (i = 0 ; i < 64 ; i++)      /* 64 luns/target, no less */
                np->badluntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa));

and another one:

                for (i = 0 ; i < 64 ; i++)
                        tp->luntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa));

and another:

        for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++)
                lp->itlq_tbl[i] = cpu_to_scr(np->notask_ba);

I don't think any of these are performance path, but they're there.

Maybe SCSI drivers are unusual.  Let's try a random network driver, e1000e:

        /* Clear shadow ram */
        for (i = 0; i < nvm->word_size; i++) {
                dev_spec->shadow_ram[i].modified = false;
                dev_spec->shadow_ram[i].value = 0xFFFF;
        }

(three of those loops)

I mean, it's not going to bring the house down, but that I chose two
drivers more or less at random and found places where such an API could
be used indicates there may be more places this should be used.  And it
gives architectures a good place to plug in a performance optimisation
for zram rather than hiding it away in that funny old driver almost
nobody looks at.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* memfill v2 now with ARM and x86 implementations
  2017-02-05 15:16 [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
                   ` (2 preceding siblings ...)
  2017-02-07  9:40 ` memfill David Howells
@ 2017-03-11 14:56 ` Matthew Wilcox
  2017-03-13  5:17   ` Minchan Kim
  3 siblings, 1 reply; 15+ messages in thread
From: Matthew Wilcox @ 2017-03-11 14:56 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Andrew Morton, linux-kernel, sergey.senozhatsky, iamjoonsoo.kim,
	ngupta, zhouxianrong, zhouxiyu, weidu.du, zhangshiming5,
	Mi.Sophia.Wang, won.ho.park

On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
> +static inline void zram_fill_page(char *ptr, unsigned long len,
> +					unsigned long value)
> +{
> +	int i;
> +	unsigned long *page = (unsigned long *)ptr;
> +
> +	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
> +
> +	if (likely(value == 0)) {
> +		memset(ptr, 0, len);
> +	} else {
> +		for (i = 0; i < len / sizeof(*page); i++)
> +			page[i] = value;
> +	}
> +}

I've hacked up memset32/memset64 for both ARM and x86 here:

http://git.infradead.org/users/willy/linux-dax.git/shortlog/refs/heads/memfill

Can you do some performance testing and see if it makes a difference?

At this point, I'd probably ask for the first 5 patches in that git
branch to be included, and leave out memfill and the shoddy testsuite.

I haven't actually tested either asm implementation ... only the
C fallback.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: memfill v2 now with ARM and x86 implementations
  2017-03-11 14:56 ` memfill v2 now with ARM and x86 implementations Matthew Wilcox
@ 2017-03-13  5:17   ` Minchan Kim
  0 siblings, 0 replies; 15+ messages in thread
From: Minchan Kim @ 2017-03-13  5:17 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, linux-kernel, sergey.senozhatsky, iamjoonsoo.kim,
	ngupta, zhouxianrong, zhouxiyu, weidu.du, zhangshiming5,
	Mi.Sophia.Wang, won.ho.park

Hi Matthew,

On Sat, Mar 11, 2017 at 06:56:40AM -0800, Matthew Wilcox wrote:
> On Mon, Feb 06, 2017 at 12:16:44AM +0900, Minchan Kim wrote:
> > +static inline void zram_fill_page(char *ptr, unsigned long len,
> > +					unsigned long value)
> > +{
> > +	int i;
> > +	unsigned long *page = (unsigned long *)ptr;
> > +
> > +	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
> > +
> > +	if (likely(value == 0)) {
> > +		memset(ptr, 0, len);
> > +	} else {
> > +		for (i = 0; i < len / sizeof(*page); i++)
> > +			page[i] = value;
> > +	}
> > +}
> 
> I've hacked up memset32/memset64 for both ARM and x86 here:
> 
> http://git.infradead.org/users/willy/linux-dax.git/shortlog/refs/heads/memfill

Thanks for the patch.

> 
> Can you do some performance testing and see if it makes a difference?

I tested that zram is *full* with non-zero 100M dedupable data(i.e.,
it's a ideal case) on x86. With this, I see 7% enhancement.
        
        perf stat -r 10 dd if=/dev/zram0 of=/dev/null

vanilla:        0.232050465 seconds time elapsed ( +-  0.51% )
memset_l:       0.217219387 seconds time elapsed ( +-  0.07% )

I doubt it makes such benefit in read workload which a small percent
non-zero dedup data(e.g., under 3%) but it makes code simple/perform
win.

Thanks.

> 
> At this point, I'd probably ask for the first 5 patches in that git
> branch to be included, and leave out memfill and the shoddy testsuite.
> 
> I haven't actually tested either asm implementation ... only the
> C fallback.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, back to index

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-05 15:16 [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
2017-02-06 14:49 ` memfill Matthew Wilcox
2017-02-07  2:47   ` memfill zhouxianrong
2017-02-07  4:59   ` memfill Minchan Kim
2017-02-07 19:07   ` memfill James Bottomley
2017-02-08 18:04     ` memfill Matthew Wilcox
2017-02-08 21:01       ` memfill James Bottomley
2017-02-08 21:54         ` memfill Matthew Wilcox
2017-02-07  6:57 ` [PATCH v4] zram: extend zero pages to same element pages Minchan Kim
2017-02-07  9:40 ` memfill David Howells
2017-02-07 17:22   ` memfill Matthew Wilcox
2017-02-07 17:29   ` memfill David Howells
2017-02-07 19:03     ` memfill Matthew Wilcox
2017-03-11 14:56 ` memfill v2 now with ARM and x86 implementations Matthew Wilcox
2017-03-13  5:17   ` Minchan Kim

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git