All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: add counters for different page fault types
@ 2017-05-24 19:41 Luigi Semenzato
  2017-05-25  0:19 ` Minchan Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Luigi Semenzato @ 2017-05-24 19:41 UTC (permalink / raw)
  To: linux-mm, dianders, dtor, sonnyrao; +Cc: Luigi Semenzato, Luigi Semenzato

VM event counters are added to keep track of anonymous
vs. file vs. shmem page faults.  They are: pgmajfault_a,
pgmajfault_f and pgmajfault_s.  These are useful to
analyze system performance, particularly when the cost
of a fault for a file page is very different from that
of an anonymous page, as would happen, for instance, in
the presence of zram.

The PGMAJFAULT counter is no longer directly maintained.
Instead the three new counters are added whenever the
total count is needed.

Signed-off-by: Luigi Semenzato <semenzato@google.com>
---
 arch/s390/appldata/appldata_mem.c | 9 ++++++++-
 drivers/virtio/virtio_balloon.c   | 5 ++++-
 fs/dax.c                          | 5 +++--
 fs/ncpfs/mmap.c                   | 4 ++--
 include/linux/vm_event_item.h     | 1 +
 mm/filemap.c                      | 4 ++--
 mm/memcontrol.c                   | 7 ++++++-
 mm/memory.c                       | 4 ++--
 mm/shmem.c                        | 4 ++--
 mm/vmstat.c                       | 5 +++++
 10 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 598df5708501..adb8b6412ffa 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -62,6 +62,9 @@ struct appldata_mem_data {
 	u64 pgalloc;		/* page allocations */
 	u64 pgfault;		/* page faults (major+minor) */
 	u64 pgmajfault;		/* page faults (major only) */
+	u64 pgmajfault_s;	/* shmem page faults (major only) */
+	u64 pgmajfault_a;	/* anonymous page faults (major only) */
+	u64 pgmajfault_f;	/* file page faults (major only) */
 // <-- New in 2.6
 
 } __packed;
@@ -93,7 +96,11 @@ static void appldata_get_mem_data(void *data)
 	mem_data->pgalloc    = ev[PGALLOC_NORMAL];
 	mem_data->pgalloc    += ev[PGALLOC_DMA];
 	mem_data->pgfault    = ev[PGFAULT];
-	mem_data->pgmajfault = ev[PGMAJFAULT];
+	mem_data->pgmajfault =
+		ev[PGMAJFAULT_S] + ev[PGMAJFAULT_A] + ev[PGMAJFAULT_F];
+	mem_data->pgmajfault_s = ev[PGMAJFAULT_S];
+	mem_data->pgmajfault_a = ev[PGMAJFAULT_A];
+	mem_data->pgmajfault_f = ev[PGMAJFAULT_F];
 
 	si_meminfo(&val);
 	mem_data->sharedram = val.sharedram;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 408c174ef0d5..ed7100645d25 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -259,7 +259,10 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
 				pages_to_bytes(events[PSWPIN]));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
 				pages_to_bytes(events[PSWPOUT]));
-	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT,
+		    events[PGMAJFAULT_S] +
+		    events[PGMAJFAULT_A] +
+		    events[PGMAJFAULT_F]);
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
 #endif
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
diff --git a/fs/dax.c b/fs/dax.c
index c22eaf162f95..3c92f2af0514 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1200,8 +1200,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
-			count_vm_event(PGMAJFAULT);
-			mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+			count_vm_event(PGMAJFAULT_F);
+			mem_cgroup_count_vm_event(vmf->vma->vm_mm,
+						  PGMAJFAULT_F);
 			major = VM_FAULT_MAJOR;
 		}
 		error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 0c3905e0542e..ae04b9d86288 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -88,8 +88,8 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf)
 	 * fetches from the network, here the analogue of disk.
 	 * -- nyc
 	 */
-	count_vm_event(PGMAJFAULT);
-	mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+	count_vm_event(PGMAJFAULT_F);
+	mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT_F);
 	return VM_FAULT_MAJOR;
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index d84ae90ccd5c..2d2df45d4520 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -27,6 +27,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGSCAN_SKIP),
 		PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
 		PGFAULT, PGMAJFAULT,
+		PGMAJFAULT_S, PGMAJFAULT_A, PGMAJFAULT_F,
 		PGLAZYFREED,
 		PGREFILL,
 		PGSTEAL_KSWAPD,
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..d2b187b648b3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2225,8 +2225,8 @@ int filemap_fault(struct vm_fault *vmf)
 	} else if (!page) {
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vmf->vma, ra, file, offset);
-		count_vm_event(PGMAJFAULT);
-		mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+		count_vm_event(PGMAJFAULT_F);
+		mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT_F);
 		ret = VM_FAULT_MAJOR;
 retry_find:
 		page = find_get_page(mapping, offset);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 94172089f52f..045361f2b8fa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3122,6 +3122,8 @@ unsigned int memcg1_events[] = {
 	PGPGOUT,
 	PGFAULT,
 	PGMAJFAULT,
+	PGMAJFAULT_A,
+	PGMAJFAULT_F,
 };
 
 static const char *const memcg1_event_names[] = {
@@ -3129,6 +3131,8 @@ static const char *const memcg1_event_names[] = {
 	"pgpgout",
 	"pgfault",
 	"pgmajfault",
+	"pgmajfault_a",
+	"pgmajfault_f",
 };
 
 static int memcg_stat_show(struct seq_file *m, void *v)
@@ -5229,7 +5233,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	/* Accumulated memory events */
 
 	seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
-	seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
+	seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT_S] +
+			events[PGMAJFAULT_A] + events[PGMAJFAULT_F]);
 
 	seq_printf(m, "workingset_refault %lu\n",
 		   stat[WORKINGSET_REFAULT]);
diff --git a/mm/memory.c b/mm/memory.c
index 6ff5d729ded0..2c2b7b3ffe7f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2718,8 +2718,8 @@ int do_swap_page(struct vm_fault *vmf)
 
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
-		count_vm_event(PGMAJFAULT);
-		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+		count_vm_event(PGMAJFAULT_A);
+		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT_A);
 	} else if (PageHWPoison(page)) {
 		/*
 		 * hwpoisoned dirty swapcache pages are kept for killing
diff --git a/mm/shmem.c b/mm/shmem.c
index e67d6ba4e98e..5eea045575c4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1644,9 +1644,9 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 			/* Or update major stats only when swapin succeeds?? */
 			if (fault_type) {
 				*fault_type |= VM_FAULT_MAJOR;
-				count_vm_event(PGMAJFAULT);
+				count_vm_event(PGMAJFAULT_S);
 				mem_cgroup_count_vm_event(charge_mm,
-							  PGMAJFAULT);
+							  PGMAJFAULT_S);
 			}
 			/* Here we actually start the io */
 			page = shmem_swapin(swap, gfp, info, index);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 76f73670200a..741bb14761cd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -995,6 +995,9 @@ const char * const vmstat_text[] = {
 
 	"pgfault",
 	"pgmajfault",
+	"pgmajfault_s",
+	"pgmajfault_a",
+	"pgmajfault_f",
 	"pglazyfreed",
 
 	"pgrefill",
@@ -1511,6 +1514,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
 	all_vm_events(v);
 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
 	v[PGPGOUT] /= 2;
+	/* Add up page faults */
+	v[PGMAJFAULT] = v[PGMAJFAULT_S] + v[PGMAJFAULT_A] + v[PGMAJFAULT_F];
 #endif
 	return (unsigned long *)m->private + *pos;
 }
-- 
2.13.0.219.gdb65acc882-goog

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-24 19:41 [PATCH] mm: add counters for different page fault types Luigi Semenzato
@ 2017-05-25  0:19 ` Minchan Kim
  2017-05-25 15:54   ` Luigi Semenzato
  0 siblings, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-05-25  0:19 UTC (permalink / raw)
  To: Luigi Semenzato; +Cc: linux-mm, dianders, dtor, sonnyrao, Luigi Semenzato

Hi Luigi,

On Wed, May 24, 2017 at 12:41:26PM -0700, Luigi Semenzato wrote:
> VM event counters are added to keep track of anonymous
> vs. file vs. shmem page faults.  They are: pgmajfault_a,
> pgmajfault_f and pgmajfault_s.  These are useful to
> analyze system performance, particularly when the cost
> of a fault for a file page is very different from that
> of an anonymous page, as would happen, for instance, in
> the presence of zram.

Yeb, it's useful with zram and the way I have used is 

        PGMAJFAULT - PSWPIN

With that, I can get how many portion in majfault stems from
file-backed pages while others are from swap.

Can't it meet for your requirement?

Thanks.

> 
> The PGMAJFAULT counter is no longer directly maintained.
> Instead the three new counters are added whenever the
> total count is needed.
> 
> Signed-off-by: Luigi Semenzato <semenzato@google.com>
> ---
>  arch/s390/appldata/appldata_mem.c | 9 ++++++++-
>  drivers/virtio/virtio_balloon.c   | 5 ++++-
>  fs/dax.c                          | 5 +++--
>  fs/ncpfs/mmap.c                   | 4 ++--
>  include/linux/vm_event_item.h     | 1 +
>  mm/filemap.c                      | 4 ++--
>  mm/memcontrol.c                   | 7 ++++++-
>  mm/memory.c                       | 4 ++--
>  mm/shmem.c                        | 4 ++--
>  mm/vmstat.c                       | 5 +++++
>  10 files changed, 35 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
> index 598df5708501..adb8b6412ffa 100644
> --- a/arch/s390/appldata/appldata_mem.c
> +++ b/arch/s390/appldata/appldata_mem.c
> @@ -62,6 +62,9 @@ struct appldata_mem_data {
>  	u64 pgalloc;		/* page allocations */
>  	u64 pgfault;		/* page faults (major+minor) */
>  	u64 pgmajfault;		/* page faults (major only) */
> +	u64 pgmajfault_s;	/* shmem page faults (major only) */
> +	u64 pgmajfault_a;	/* anonymous page faults (major only) */
> +	u64 pgmajfault_f;	/* file page faults (major only) */
>  // <-- New in 2.6
>  
>  } __packed;
> @@ -93,7 +96,11 @@ static void appldata_get_mem_data(void *data)
>  	mem_data->pgalloc    = ev[PGALLOC_NORMAL];
>  	mem_data->pgalloc    += ev[PGALLOC_DMA];
>  	mem_data->pgfault    = ev[PGFAULT];
> -	mem_data->pgmajfault = ev[PGMAJFAULT];
> +	mem_data->pgmajfault =
> +		ev[PGMAJFAULT_S] + ev[PGMAJFAULT_A] + ev[PGMAJFAULT_F];
> +	mem_data->pgmajfault_s = ev[PGMAJFAULT_S];
> +	mem_data->pgmajfault_a = ev[PGMAJFAULT_A];
> +	mem_data->pgmajfault_f = ev[PGMAJFAULT_F];
>  
>  	si_meminfo(&val);
>  	mem_data->sharedram = val.sharedram;
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 408c174ef0d5..ed7100645d25 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -259,7 +259,10 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
>  				pages_to_bytes(events[PSWPIN]));
>  	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
>  				pages_to_bytes(events[PSWPOUT]));
> -	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
> +	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT,
> +		    events[PGMAJFAULT_S] +
> +		    events[PGMAJFAULT_A] +
> +		    events[PGMAJFAULT_F]);
>  	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>  #endif
>  	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
> diff --git a/fs/dax.c b/fs/dax.c
> index c22eaf162f95..3c92f2af0514 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1200,8 +1200,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
>  	switch (iomap.type) {
>  	case IOMAP_MAPPED:
>  		if (iomap.flags & IOMAP_F_NEW) {
> -			count_vm_event(PGMAJFAULT);
> -			mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
> +			count_vm_event(PGMAJFAULT_F);
> +			mem_cgroup_count_vm_event(vmf->vma->vm_mm,
> +						  PGMAJFAULT_F);
>  			major = VM_FAULT_MAJOR;
>  		}
>  		error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
> index 0c3905e0542e..ae04b9d86288 100644
> --- a/fs/ncpfs/mmap.c
> +++ b/fs/ncpfs/mmap.c
> @@ -88,8 +88,8 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf)
>  	 * fetches from the network, here the analogue of disk.
>  	 * -- nyc
>  	 */
> -	count_vm_event(PGMAJFAULT);
> -	mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
> +	count_vm_event(PGMAJFAULT_F);
> +	mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT_F);
>  	return VM_FAULT_MAJOR;
>  }
>  
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index d84ae90ccd5c..2d2df45d4520 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -27,6 +27,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  		FOR_ALL_ZONES(PGSCAN_SKIP),
>  		PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
>  		PGFAULT, PGMAJFAULT,
> +		PGMAJFAULT_S, PGMAJFAULT_A, PGMAJFAULT_F,
>  		PGLAZYFREED,
>  		PGREFILL,
>  		PGSTEAL_KSWAPD,
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 6f1be573a5e6..d2b187b648b3 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2225,8 +2225,8 @@ int filemap_fault(struct vm_fault *vmf)
>  	} else if (!page) {
>  		/* No page in the page cache at all */
>  		do_sync_mmap_readahead(vmf->vma, ra, file, offset);
> -		count_vm_event(PGMAJFAULT);
> -		mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
> +		count_vm_event(PGMAJFAULT_F);
> +		mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT_F);
>  		ret = VM_FAULT_MAJOR;
>  retry_find:
>  		page = find_get_page(mapping, offset);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 94172089f52f..045361f2b8fa 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3122,6 +3122,8 @@ unsigned int memcg1_events[] = {
>  	PGPGOUT,
>  	PGFAULT,
>  	PGMAJFAULT,
> +	PGMAJFAULT_A,
> +	PGMAJFAULT_F,
>  };
>  
>  static const char *const memcg1_event_names[] = {
> @@ -3129,6 +3131,8 @@ static const char *const memcg1_event_names[] = {
>  	"pgpgout",
>  	"pgfault",
>  	"pgmajfault",
> +	"pgmajfault_a",
> +	"pgmajfault_f",
>  };
>  
>  static int memcg_stat_show(struct seq_file *m, void *v)
> @@ -5229,7 +5233,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
>  	/* Accumulated memory events */
>  
>  	seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
> -	seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
> +	seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT_S] +
> +			events[PGMAJFAULT_A] + events[PGMAJFAULT_F]);
>  
>  	seq_printf(m, "workingset_refault %lu\n",
>  		   stat[WORKINGSET_REFAULT]);
> diff --git a/mm/memory.c b/mm/memory.c
> index 6ff5d729ded0..2c2b7b3ffe7f 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2718,8 +2718,8 @@ int do_swap_page(struct vm_fault *vmf)
>  
>  		/* Had to read the page from swap area: Major fault */
>  		ret = VM_FAULT_MAJOR;
> -		count_vm_event(PGMAJFAULT);
> -		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> +		count_vm_event(PGMAJFAULT_A);
> +		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT_A);
>  	} else if (PageHWPoison(page)) {
>  		/*
>  		 * hwpoisoned dirty swapcache pages are kept for killing
> diff --git a/mm/shmem.c b/mm/shmem.c
> index e67d6ba4e98e..5eea045575c4 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1644,9 +1644,9 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  			/* Or update major stats only when swapin succeeds?? */
>  			if (fault_type) {
>  				*fault_type |= VM_FAULT_MAJOR;
> -				count_vm_event(PGMAJFAULT);
> +				count_vm_event(PGMAJFAULT_S);
>  				mem_cgroup_count_vm_event(charge_mm,
> -							  PGMAJFAULT);
> +							  PGMAJFAULT_S);
>  			}
>  			/* Here we actually start the io */
>  			page = shmem_swapin(swap, gfp, info, index);
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 76f73670200a..741bb14761cd 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -995,6 +995,9 @@ const char * const vmstat_text[] = {
>  
>  	"pgfault",
>  	"pgmajfault",
> +	"pgmajfault_s",
> +	"pgmajfault_a",
> +	"pgmajfault_f",
>  	"pglazyfreed",
>  
>  	"pgrefill",
> @@ -1511,6 +1514,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
>  	all_vm_events(v);
>  	v[PGPGIN] /= 2;		/* sectors -> kbytes */
>  	v[PGPGOUT] /= 2;
> +	/* Add up page faults */
> +	v[PGMAJFAULT] = v[PGMAJFAULT_S] + v[PGMAJFAULT_A] + v[PGMAJFAULT_F];
>  #endif
>  	return (unsigned long *)m->private + *pos;
>  }
> -- 
> 2.13.0.219.gdb65acc882-goog
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-25  0:19 ` Minchan Kim
@ 2017-05-25 15:54   ` Luigi Semenzato
  2017-05-26  4:06     ` Minchan Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Luigi Semenzato @ 2017-05-25 15:54 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Linux Memory Management List, Douglas Anderson, Dmitry Torokhov,
	Sonny Rao

Thank you Minchan, that's certainly simpler and I am annoyed that I
didn't consider that :/

By a quick look, there are a few differences but maybe they don't matter?

1. can a major (anon) fault result in a hit in the swap cache?  So
pswpin will not get incremented and the fault will be counted as a
file fault.

2. pswpin also counts swapins from readahead --- which however I think
we have turned off (at least I hope so, since readahead isn't useful
with zram, in fact maybe zram should log a warning when readahead is
greater than 0 because I think that's the default).

Incidentally, I understand anon and file faults, but what's a shmem fault?

Thanks!





On Wed, May 24, 2017 at 5:19 PM, Minchan Kim <minchan@kernel.org> wrote:
> Hi Luigi,
>
> On Wed, May 24, 2017 at 12:41:26PM -0700, Luigi Semenzato wrote:
>> VM event counters are added to keep track of anonymous
>> vs. file vs. shmem page faults.  They are: pgmajfault_a,
>> pgmajfault_f and pgmajfault_s.  These are useful to
>> analyze system performance, particularly when the cost
>> of a fault for a file page is very different from that
>> of an anonymous page, as would happen, for instance, in
>> the presence of zram.
>
> Yeb, it's useful with zram and the way I have used is
>
>         PGMAJFAULT - PSWPIN
>
> With that, I can get how many portion in majfault stems from
> file-backed pages while others are from swap.
>
> Can't it meet for your requirement?
>
> Thanks.
>
>>
>> The PGMAJFAULT counter is no longer directly maintained.
>> Instead the three new counters are added whenever the
>> total count is needed.
>>
>> Signed-off-by: Luigi Semenzato <semenzato@google.com>
>> ---
>>  arch/s390/appldata/appldata_mem.c | 9 ++++++++-
>>  drivers/virtio/virtio_balloon.c   | 5 ++++-
>>  fs/dax.c                          | 5 +++--
>>  fs/ncpfs/mmap.c                   | 4 ++--
>>  include/linux/vm_event_item.h     | 1 +
>>  mm/filemap.c                      | 4 ++--
>>  mm/memcontrol.c                   | 7 ++++++-
>>  mm/memory.c                       | 4 ++--
>>  mm/shmem.c                        | 4 ++--
>>  mm/vmstat.c                       | 5 +++++
>>  10 files changed, 35 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
>> index 598df5708501..adb8b6412ffa 100644
>> --- a/arch/s390/appldata/appldata_mem.c
>> +++ b/arch/s390/appldata/appldata_mem.c
>> @@ -62,6 +62,9 @@ struct appldata_mem_data {
>>       u64 pgalloc;            /* page allocations */
>>       u64 pgfault;            /* page faults (major+minor) */
>>       u64 pgmajfault;         /* page faults (major only) */
>> +     u64 pgmajfault_s;       /* shmem page faults (major only) */
>> +     u64 pgmajfault_a;       /* anonymous page faults (major only) */
>> +     u64 pgmajfault_f;       /* file page faults (major only) */
>>  // <-- New in 2.6
>>
>>  } __packed;
>> @@ -93,7 +96,11 @@ static void appldata_get_mem_data(void *data)
>>       mem_data->pgalloc    = ev[PGALLOC_NORMAL];
>>       mem_data->pgalloc    += ev[PGALLOC_DMA];
>>       mem_data->pgfault    = ev[PGFAULT];
>> -     mem_data->pgmajfault = ev[PGMAJFAULT];
>> +     mem_data->pgmajfault =
>> +             ev[PGMAJFAULT_S] + ev[PGMAJFAULT_A] + ev[PGMAJFAULT_F];
>> +     mem_data->pgmajfault_s = ev[PGMAJFAULT_S];
>> +     mem_data->pgmajfault_a = ev[PGMAJFAULT_A];
>> +     mem_data->pgmajfault_f = ev[PGMAJFAULT_F];
>>
>>       si_meminfo(&val);
>>       mem_data->sharedram = val.sharedram;
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 408c174ef0d5..ed7100645d25 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -259,7 +259,10 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
>>                               pages_to_bytes(events[PSWPIN]));
>>       update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
>>                               pages_to_bytes(events[PSWPOUT]));
>> -     update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
>> +     update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT,
>> +                 events[PGMAJFAULT_S] +
>> +                 events[PGMAJFAULT_A] +
>> +                 events[PGMAJFAULT_F]);
>>       update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
>>  #endif
>>       update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
>> diff --git a/fs/dax.c b/fs/dax.c
>> index c22eaf162f95..3c92f2af0514 100644
>> --- a/fs/dax.c
>> +++ b/fs/dax.c
>> @@ -1200,8 +1200,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
>>       switch (iomap.type) {
>>       case IOMAP_MAPPED:
>>               if (iomap.flags & IOMAP_F_NEW) {
>> -                     count_vm_event(PGMAJFAULT);
>> -                     mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
>> +                     count_vm_event(PGMAJFAULT_F);
>> +                     mem_cgroup_count_vm_event(vmf->vma->vm_mm,
>> +                                               PGMAJFAULT_F);
>>                       major = VM_FAULT_MAJOR;
>>               }
>>               error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
>> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
>> index 0c3905e0542e..ae04b9d86288 100644
>> --- a/fs/ncpfs/mmap.c
>> +++ b/fs/ncpfs/mmap.c
>> @@ -88,8 +88,8 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf)
>>        * fetches from the network, here the analogue of disk.
>>        * -- nyc
>>        */
>> -     count_vm_event(PGMAJFAULT);
>> -     mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
>> +     count_vm_event(PGMAJFAULT_F);
>> +     mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT_F);
>>       return VM_FAULT_MAJOR;
>>  }
>>
>> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
>> index d84ae90ccd5c..2d2df45d4520 100644
>> --- a/include/linux/vm_event_item.h
>> +++ b/include/linux/vm_event_item.h
>> @@ -27,6 +27,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>>               FOR_ALL_ZONES(PGSCAN_SKIP),
>>               PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
>>               PGFAULT, PGMAJFAULT,
>> +             PGMAJFAULT_S, PGMAJFAULT_A, PGMAJFAULT_F,
>>               PGLAZYFREED,
>>               PGREFILL,
>>               PGSTEAL_KSWAPD,
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index 6f1be573a5e6..d2b187b648b3 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -2225,8 +2225,8 @@ int filemap_fault(struct vm_fault *vmf)
>>       } else if (!page) {
>>               /* No page in the page cache at all */
>>               do_sync_mmap_readahead(vmf->vma, ra, file, offset);
>> -             count_vm_event(PGMAJFAULT);
>> -             mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
>> +             count_vm_event(PGMAJFAULT_F);
>> +             mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT_F);
>>               ret = VM_FAULT_MAJOR;
>>  retry_find:
>>               page = find_get_page(mapping, offset);
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 94172089f52f..045361f2b8fa 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -3122,6 +3122,8 @@ unsigned int memcg1_events[] = {
>>       PGPGOUT,
>>       PGFAULT,
>>       PGMAJFAULT,
>> +     PGMAJFAULT_A,
>> +     PGMAJFAULT_F,
>>  };
>>
>>  static const char *const memcg1_event_names[] = {
>> @@ -3129,6 +3131,8 @@ static const char *const memcg1_event_names[] = {
>>       "pgpgout",
>>       "pgfault",
>>       "pgmajfault",
>> +     "pgmajfault_a",
>> +     "pgmajfault_f",
>>  };
>>
>>  static int memcg_stat_show(struct seq_file *m, void *v)
>> @@ -5229,7 +5233,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
>>       /* Accumulated memory events */
>>
>>       seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
>> -     seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
>> +     seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT_S] +
>> +                     events[PGMAJFAULT_A] + events[PGMAJFAULT_F]);
>>
>>       seq_printf(m, "workingset_refault %lu\n",
>>                  stat[WORKINGSET_REFAULT]);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 6ff5d729ded0..2c2b7b3ffe7f 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2718,8 +2718,8 @@ int do_swap_page(struct vm_fault *vmf)
>>
>>               /* Had to read the page from swap area: Major fault */
>>               ret = VM_FAULT_MAJOR;
>> -             count_vm_event(PGMAJFAULT);
>> -             mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
>> +             count_vm_event(PGMAJFAULT_A);
>> +             mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT_A);
>>       } else if (PageHWPoison(page)) {
>>               /*
>>                * hwpoisoned dirty swapcache pages are kept for killing
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index e67d6ba4e98e..5eea045575c4 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1644,9 +1644,9 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>>                       /* Or update major stats only when swapin succeeds?? */
>>                       if (fault_type) {
>>                               *fault_type |= VM_FAULT_MAJOR;
>> -                             count_vm_event(PGMAJFAULT);
>> +                             count_vm_event(PGMAJFAULT_S);
>>                               mem_cgroup_count_vm_event(charge_mm,
>> -                                                       PGMAJFAULT);
>> +                                                       PGMAJFAULT_S);
>>                       }
>>                       /* Here we actually start the io */
>>                       page = shmem_swapin(swap, gfp, info, index);
>> diff --git a/mm/vmstat.c b/mm/vmstat.c
>> index 76f73670200a..741bb14761cd 100644
>> --- a/mm/vmstat.c
>> +++ b/mm/vmstat.c
>> @@ -995,6 +995,9 @@ const char * const vmstat_text[] = {
>>
>>       "pgfault",
>>       "pgmajfault",
>> +     "pgmajfault_s",
>> +     "pgmajfault_a",
>> +     "pgmajfault_f",
>>       "pglazyfreed",
>>
>>       "pgrefill",
>> @@ -1511,6 +1514,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
>>       all_vm_events(v);
>>       v[PGPGIN] /= 2;         /* sectors -> kbytes */
>>       v[PGPGOUT] /= 2;
>> +     /* Add up page faults */
>> +     v[PGMAJFAULT] = v[PGMAJFAULT_S] + v[PGMAJFAULT_A] + v[PGMAJFAULT_F];
>>  #endif
>>       return (unsigned long *)m->private + *pos;
>>  }
>> --
>> 2.13.0.219.gdb65acc882-goog
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-25 15:54   ` Luigi Semenzato
@ 2017-05-26  4:06     ` Minchan Kim
  2017-05-26 18:43       ` Luigi Semenzato
  0 siblings, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-05-26  4:06 UTC (permalink / raw)
  To: Luigi Semenzato
  Cc: Linux Memory Management List, Douglas Anderson, Dmitry Torokhov,
	Sonny Rao

On Thu, May 25, 2017 at 08:54:09AM -0700, Luigi Semenzato wrote:
> Thank you Minchan, that's certainly simpler and I am annoyed that I
> didn't consider that :/
> 
> By a quick look, there are a few differences but maybe they don't matter?
> 
> 1. can a major (anon) fault result in a hit in the swap cache?  So
> pswpin will not get incremented and the fault will be counted as a
> file fault.

If it is swap cache hit, it's not a major fault which causes IO
so VM count it as minor fault, not major.

> 
> 2. pswpin also counts swapins from readahead --- which however I think
> we have turned off (at least I hope so, since readahead isn't useful
> with zram, in fact maybe zram should log a warning when readahead is
> greater than 0 because I think that's the default).

Yub, I expected you guys used zram with readahead off so it shouldn't
be a big problem.
About auto resetting readahead with zram, I agree with you.
But there are some reasons I postpone the work. No want to discuss
it in this thread/moment. ;)

> 
> Incidentally, I understand anon and file faults, but what's a shmem fault?

For me, it was out of my interest but if you want to count shmem fault,
maybe, we need to introdue new stat(e.g., PSWPIN_SHM) in shmem_swapin
but there are concrete reasons to justify in changelog. :)

Thanks!

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-26  4:06     ` Minchan Kim
@ 2017-05-26 18:43       ` Luigi Semenzato
  2017-05-29  8:15         ` Minchan Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Luigi Semenzato @ 2017-05-26 18:43 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Linux Memory Management List, Douglas Anderson, Dmitry Torokhov,
	Sonny Rao

Many thanks Minchan.

On Thu, May 25, 2017 at 9:06 PM, Minchan Kim <minchan@kernel.org> wrote:

> If it is swap cache hit, it's not a major fault which causes IO
> so VM count it as minor fault, not major.

Cool---but see below.

> Yub, I expected you guys used zram with readahead off so it shouldn't
> be a big problem.

By the way, I was referring to page clustering.  We do this in sysctl.conf:

# Disable swap read-ahead
vm.page-cluster = 0

I figured that the readahead from the disk device
(/sys/block/zram0/queue/read_ahead_kb) is not meaningful---am I
correct?

These numbers are from a Chromebook with a few dozen Chrome tabs and a
couple of Android apps, and pretty heavy use of zram.

pgpgin 4688863
pgpgout 442052
pswpin 353675
pswpout 1072021
...
pgfault 5564247
pgmajfault 355758
pgmajfault_s 6297
pgmajfault_a 317645
pgmajfault_f 31816
pgmajfault_ax 8494
pgmajfault_fx 13201

where _s, _a, and _f are for shmem, anon, and file pages.
(ax and fx are for the subset of executable pages---I was curious about that)

So the numbers don't completely match:
anon faults = 318,000
swap ins = 354,000

Any idea of what might explain the difference?

> About auto resetting readahead with zram, I agree with you.
> But there are some reasons I postpone the work. No want to discuss
> it in this thread/moment. ;)

Yes, I wasn't even thinking of auto-resetting, just log a warning.

>> Incidentally, I understand anon and file faults, but what's a shmem fault?
>
> For me, it was out of my interest but if you want to count shmem fault,
> maybe, we need to introdue new stat(e.g., PSWPIN_SHM) in shmem_swapin
> but there are concrete reasons to justify in changelog. :)

Actually mine was a simpler question---I have no idea what a major
shmem fault is.   And for this experiment it's a relatively small
number, but a similar order of magnitude to the (expensive) file
faults, so I don't want to completely ignore it.

Thanks again.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-26 18:43       ` Luigi Semenzato
@ 2017-05-29  8:15         ` Minchan Kim
  2017-05-30 18:41           ` Luigi Semenzato
  0 siblings, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-05-29  8:15 UTC (permalink / raw)
  To: Luigi Semenzato
  Cc: Linux Memory Management List, Douglas Anderson, Dmitry Torokhov,
	Sonny Rao

Hi Luigi,

On Fri, May 26, 2017 at 11:43:48AM -0700, Luigi Semenzato wrote:
> Many thanks Minchan.
> 
> On Thu, May 25, 2017 at 9:06 PM, Minchan Kim <minchan@kernel.org> wrote:
> 
> > If it is swap cache hit, it's not a major fault which causes IO
> > so VM count it as minor fault, not major.
> 
> Cool---but see below.
> 
> > Yub, I expected you guys used zram with readahead off so it shouldn't
> > be a big problem.
> 
> By the way, I was referring to page clustering.  We do this in sysctl.conf:

It's readahead of swap.
I meant it exactly. :)

> 
> # Disable swap read-ahead
> vm.page-cluster = 0
> 
> I figured that the readahead from the disk device
> (/sys/block/zram0/queue/read_ahead_kb) is not meaningful---am I
> correct?

Yub.

> 
> These numbers are from a Chromebook with a few dozen Chrome tabs and a
> couple of Android apps, and pretty heavy use of zram.
> 
> pgpgin 4688863
> pgpgout 442052
> pswpin 353675
> pswpout 1072021
> ...
> pgfault 5564247
> pgmajfault 355758
> pgmajfault_s 6297
> pgmajfault_a 317645
> pgmajfault_f 31816
> pgmajfault_ax 8494
> pgmajfault_fx 13201
> 
> where _s, _a, and _f are for shmem, anon, and file pages.
> (ax and fx are for the subset of executable pages---I was curious about that)
> 
> So the numbers don't completely match:
> anon faults = 318,000
> swap ins = 354,000
> 
> Any idea of what might explain the difference?

Some of application call madvise(MADV_WILLNEED) for shmem or anon?

> 
> > About auto resetting readahead with zram, I agree with you.
> > But there are some reasons I postpone the work. No want to discuss
> > it in this thread/moment. ;)
> 
> Yes, I wasn't even thinking of auto-resetting, just log a warning.
> 
> >> Incidentally, I understand anon and file faults, but what's a shmem fault?
> >
> > For me, it was out of my interest but if you want to count shmem fault,
> > maybe, we need to introdue new stat(e.g., PSWPIN_SHM) in shmem_swapin
> > but there are concrete reasons to justify in changelog. :)
> 
> Actually mine was a simpler question---I have no idea what a major
> shmem fault is.   And for this experiment it's a relatively small
> number, but a similar order of magnitude to the (expensive) file
> faults, so I don't want to completely ignore it.

Yes, it's doable but a thing we need to merge new stat is concrete
justification rather than "Having, Better. Why not?" approach.
In my testing, I just wanted to know just file vs anon LRU balancing
so it was out of my interest but you might have a reason to know it.
Then, you can send a patch with detailed changelog. :)

Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-29  8:15         ` Minchan Kim
@ 2017-05-30 18:41           ` Luigi Semenzato
  2017-05-31  1:31             ` Minchan Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Luigi Semenzato @ 2017-05-30 18:41 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Linux Memory Management List, Douglas Anderson, Dmitry Torokhov,
	Sonny Rao

On Mon, May 29, 2017 at 1:15 AM, Minchan Kim <minchan@kernel.org> wrote:

>> These numbers are from a Chromebook with a few dozen Chrome tabs and a
>> couple of Android apps, and pretty heavy use of zram.
>>
>> pgpgin 4688863
>> pgpgout 442052
>> pswpin 353675
>> pswpout 1072021
>> ...
>> pgfault 5564247
>> pgmajfault 355758
>> pgmajfault_s 6297
>> pgmajfault_a 317645
>> pgmajfault_f 31816
>> pgmajfault_ax 8494
>> pgmajfault_fx 13201
>>
>> where _s, _a, and _f are for shmem, anon, and file pages.
>> (ax and fx are for the subset of executable pages---I was curious about that)
>>
>> So the numbers don't completely match:
>> anon faults = 318,000
>> swap ins = 354,000
>>
>> Any idea of what might explain the difference?
>
> Some of application call madvise(MADV_WILLNEED) for shmem or anon?

Thank you for the suggestion.  Nevertheless, the problem is that
pgmajfault - pswpin is 2,000, which is far from the 32,000 major file
faults, and figuring out where the difference comes from is not
simple.  (Or is it, and I am just too lazy?  Often it's hard to tell
:)

> Yes, it's doable but a thing we need to merge new stat is concrete
> justification rather than "Having, Better. Why not?" approach.
> In my testing, I just wanted to know just file vs anon LRU balancing
> so it was out of my interest but you might have a reason to know it.
> Then, you can send a patch with detailed changelog. :)

Yes I agree, I don't like adding random stats either "just in case
they are useful".

For this stat, we too are interested in the balance between FILE and
ANON faults, because a zram page fault costs us about 10us, but the
latency from disk read to service the FILE fault is about 300us.  So
we want to ensure we're tuning swappiness correctly (and by the way,
we also apply another patch which allows swappiness values up to 200
instead of the obsolete 100 limit).  We run experiments, but we're
also collecting stats from the field (for the users that permit it),
so we have applied this patch to all our kernels.

This is as full an explanation as I can give concisely, would this be enough?

So there is benefit for us in getting this level of detail from
vmstat.  Of course it's not clear that the benefit extends to the
greater community.  If it is deemed to not be sufficiently important
to add those vmstat fields (3 more fields added to about 100, although
we could just add 2 since pgmajfault = sum(pjmajfault_{a,f,s}) then we
can maintain them separately for Chrome OS, and that will be fine.

Thanks!



>
> Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm: add counters for different page fault types
  2017-05-30 18:41           ` Luigi Semenzato
@ 2017-05-31  1:31             ` Minchan Kim
  0 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-05-31  1:31 UTC (permalink / raw)
  To: Luigi Semenzato
  Cc: Linux Memory Management List, Douglas Anderson, Dmitry Torokhov,
	Sonny Rao

Hello Luigi,

On Tue, May 30, 2017 at 11:41:37AM -0700, Luigi Semenzato wrote:
> On Mon, May 29, 2017 at 1:15 AM, Minchan Kim <minchan@kernel.org> wrote:
> 
> >> These numbers are from a Chromebook with a few dozen Chrome tabs and a
> >> couple of Android apps, and pretty heavy use of zram.
> >>
> >> pgpgin 4688863
> >> pgpgout 442052
> >> pswpin 353675
> >> pswpout 1072021
> >> ...
> >> pgfault 5564247
> >> pgmajfault 355758
> >> pgmajfault_s 6297
> >> pgmajfault_a 317645
> >> pgmajfault_f 31816
> >> pgmajfault_ax 8494
> >> pgmajfault_fx 13201
> >>
> >> where _s, _a, and _f are for shmem, anon, and file pages.
> >> (ax and fx are for the subset of executable pages---I was curious about that)
> >>
> >> So the numbers don't completely match:
> >> anon faults = 318,000
> >> swap ins = 354,000
> >>
> >> Any idea of what might explain the difference?
> >
> > Some of application call madvise(MADV_WILLNEED) for shmem or anon?
> 
> Thank you for the suggestion.  Nevertheless, the problem is that
> pgmajfault - pswpin is 2,000, which is far from the 32,000 major file
> faults, and figuring out where the difference comes from is not
> simple.  (Or is it, and I am just too lazy?  Often it's hard to tell
> :)

What's the your kernel version? IOW, What kernel version against
this patch? It helps to look at code more detail with your patch.

A possibility is shared memory(e.g., between parent and child by forking)
with concurrent page faulting.

do_swap_page
        page = lookup_swap_cache <- first swapcache miss
        if (!page) {
                page = swapin_readahead
                        read_swap_cache_async
                          return find_get_page(swapper_space,
                                          swp_offset(entry)); <- second hit
        }

In above case, it should be minor fault if it found the page in swapcache
but VM counts it as major fault. If you verify it's the problem,
we can fix it but 2000 is under 1% in your anon faults so I'm not sure
how it's significant if we consider lots of kernel stat is inaccurate
within noise level.

> 
> > Yes, it's doable but a thing we need to merge new stat is concrete
> > justification rather than "Having, Better. Why not?" approach.
> > In my testing, I just wanted to know just file vs anon LRU balancing
> > so it was out of my interest but you might have a reason to know it.
> > Then, you can send a patch with detailed changelog. :)
> 
> Yes I agree, I don't like adding random stats either "just in case
> they are useful".
> 
> For this stat, we too are interested in the balance between FILE and
> ANON faults, because a zram page fault costs us about 10us, but the
> latency from disk read to service the FILE fault is about 300us.  So
> we want to ensure we're tuning swappiness correctly (and by the way,
> we also apply another patch which allows swappiness values up to 200
> instead of the obsolete 100 limit).  We run experiments, but we're
> also collecting stats from the field (for the users that permit it),
> so we have applied this patch to all our kernels.
> 
> This is as full an explanation as I can give concisely, would this be enough?

That's one I had interested and for me, it was enough just PGMAJFAULT
- PSWPIN. With that, we can know file-backed page's major fault vs.
zram-swap backed page fault ratio. That was all I wanted to know and
no need to know how many portion was shmem : anonymous from zram-swap.

So, I think we need following justification to merge this stat.

1. Why PGMAJFAULT - PSWPIN is not enough for your interest?
2. Why do you need to identify shmem's fault in anon major fault?

> 
> So there is benefit for us in getting this level of detail from
> vmstat.  Of course it's not clear that the benefit extends to the
> greater community.  If it is deemed to not be sufficiently important
> to add those vmstat fields (3 more fields added to about 100, although
> we could just add 2 since pgmajfault = sum(pjmajfault_{a,f,s}) then we
> can maintain them separately for Chrome OS, and that will be fine.
> 
> Thanks!
> 
> 
> 
> >
> > Thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-05-31  1:31 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-24 19:41 [PATCH] mm: add counters for different page fault types Luigi Semenzato
2017-05-25  0:19 ` Minchan Kim
2017-05-25 15:54   ` Luigi Semenzato
2017-05-26  4:06     ` Minchan Kim
2017-05-26 18:43       ` Luigi Semenzato
2017-05-29  8:15         ` Minchan Kim
2017-05-30 18:41           ` Luigi Semenzato
2017-05-31  1:31             ` Minchan Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.