All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Thelen <gthelen@google.com>
To: Minchan Kim <minchan.kim@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	containers@lists.osdl.org, Andrea Righi <arighi@develer.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>,
	Ciju Rajan K <ciju@linux.vnet.ibm.com>,
	David Rientjes <rientjes@google.com>,
	Wu Fengguang <fengguang.wu@intel.com>,
	Chad Talbott <ctalbott@google.com>,
	Justin TerAvest <teravest@google.com>,
	Vivek Goyal <vgoyal@redhat.com>
Subject: Re: [PATCH v5 4/9] writeback: create dirty_info structure
Date: Tue, 1 Mar 2011 13:13:58 -0800	[thread overview]
Message-ID: <AANLkTi=bo1H1+DxrgPjTRH7+kk9T6=BMO3mAC78=-1uG@mail.gmail.com> (raw)
In-Reply-To: <20110227163815.GC3226@barrios-desktop>

On Sun, Feb 27, 2011 at 8:38 AM, Minchan Kim <minchan.kim@gmail.com> wrote:
> On Fri, Feb 25, 2011 at 01:35:55PM -0800, Greg Thelen wrote:
>> Bundle dirty limits and dirty memory usage metrics into a dirty_info
>> structure to simplify interfaces of routines that need all.
>>
>> Signed-off-by: Greg Thelen <gthelen@google.com>
>> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
>
>> ---
>> Changelog since v4:
>> - Within new dirty_info structure, replaced nr_reclaimable with nr_file_dirty
>>   and nr_unstable_nfs to give callers finer grain dirty usage information.
>> - Added new dirty_info_reclaimable() function.
>> - Made more use of dirty_info structure in throttle_vm_writeout().
>>
>> Changelog since v3:
>> - This is a new patch in v4.
>>
>>  fs/fs-writeback.c         |    7 ++---
>>  include/linux/writeback.h |   15 ++++++++++++-
>>  mm/backing-dev.c          |   18 +++++++++------
>>  mm/page-writeback.c       |   50 ++++++++++++++++++++++----------------------
>>  mm/vmstat.c               |    6 +++-
>>  5 files changed, 57 insertions(+), 39 deletions(-)
>>
>> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
>> index 59c6e49..d75e4da 100644
>> --- a/fs/fs-writeback.c
>> +++ b/fs/fs-writeback.c
>> @@ -595,12 +595,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
>>
>>  static inline bool over_bground_thresh(void)
>>  {
>> -     unsigned long background_thresh, dirty_thresh;
>> +     struct dirty_info info;
>>
>> -     global_dirty_limits(&background_thresh, &dirty_thresh);
>> +     global_dirty_info(&info);
>>
>> -     return (global_page_state(NR_FILE_DIRTY) +
>> -             global_page_state(NR_UNSTABLE_NFS) > background_thresh);
>> +     return dirty_info_reclaimable(&info) > info.background_thresh;
>>  }
>
> Get unnecessary nr_writeback.
>
>>
>>  /*
>> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
>> index 0ead399..a06fb38 100644
>> --- a/include/linux/writeback.h
>> +++ b/include/linux/writeback.h
>> @@ -84,6 +84,19 @@ static inline void inode_sync_wait(struct inode *inode)
>>  /*
>>   * mm/page-writeback.c
>>   */
>> +struct dirty_info {
>> +     unsigned long dirty_thresh;
>> +     unsigned long background_thresh;
>> +     unsigned long nr_file_dirty;
>> +     unsigned long nr_writeback;
>> +     unsigned long nr_unstable_nfs;
>> +};
>> +
>> +static inline unsigned long dirty_info_reclaimable(struct dirty_info *info)
>> +{
>> +     return info->nr_file_dirty + info->nr_unstable_nfs;
>> +}
>> +
>>  #ifdef CONFIG_BLOCK
>>  void laptop_io_completion(struct backing_dev_info *info);
>>  void laptop_sync_completion(void);
>> @@ -124,7 +137,7 @@ struct ctl_table;
>>  int dirty_writeback_centisecs_handler(struct ctl_table *, int,
>>                                     void __user *, size_t *, loff_t *);
>>
>> -void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
>> +void global_dirty_info(struct dirty_info *info);
>>  unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
>>                              unsigned long dirty);
>>
>> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
>> index 027100d..17a06ab 100644
>> --- a/mm/backing-dev.c
>> +++ b/mm/backing-dev.c
>> @@ -66,8 +66,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>>  {
>>       struct backing_dev_info *bdi = m->private;
>>       struct bdi_writeback *wb = &bdi->wb;
>> -     unsigned long background_thresh;
>> -     unsigned long dirty_thresh;
>> +     struct dirty_info dirty_info;
>>       unsigned long bdi_thresh;
>>       unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
>>       struct inode *inode;
>> @@ -82,8 +81,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>>               nr_more_io++;
>>       spin_unlock(&inode_lock);
>>
>> -     global_dirty_limits(&background_thresh, &dirty_thresh);
>> -     bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
>> +     global_dirty_info(&dirty_info);
>> +     bdi_thresh = bdi_dirty_limit(bdi, dirty_info.dirty_thresh);
>>
>>  #define K(x) ((x) << (PAGE_SHIFT - 10))
>>       seq_printf(m,
>> @@ -99,9 +98,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>>                  "state:            %8lx\n",
>>                  (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
>>                  (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
>> -                K(bdi_thresh), K(dirty_thresh),
>> -                K(background_thresh), nr_dirty, nr_io, nr_more_io,
>> -                !list_empty(&bdi->bdi_list), bdi->state);
>> +                K(bdi_thresh),
>> +                K(dirty_info.dirty_thresh),
>> +                K(dirty_info.background_thresh),
>
> Get unnecessary nr_file_dirty, nr_writeback, nr_unstable_nfs.
>
>> +                nr_dirty,
>> +                nr_io,
>> +                nr_more_io,
>> +                !list_empty(&bdi->bdi_list),
>> +                bdi->state);
>>  #undef K
>>
>>       return 0;
>> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
>> index 4408e54..00424b9 100644
>> --- a/mm/page-writeback.c
>> +++ b/mm/page-writeback.c
>> @@ -398,7 +398,7 @@ unsigned long determine_dirtyable_memory(void)
>>  }
>>
>>  /*
>> - * global_dirty_limits - background-writeback and dirty-throttling thresholds
>> + * global_dirty_info - return dirty thresholds and usage metrics
>>   *
>>   * Calculate the dirty thresholds based on sysctl parameters
>>   * - vm.dirty_background_ratio  or  vm.dirty_background_bytes
>> @@ -406,7 +406,7 @@ unsigned long determine_dirtyable_memory(void)
>>   * The dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and
>>   * real-time tasks.
>>   */
>> -void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
>> +void global_dirty_info(struct dirty_info *info)
>>  {
>>       unsigned long background;
>>       unsigned long dirty;
>> @@ -426,6 +426,10 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
>>       else
>>               background = (dirty_background_ratio * available_memory) / 100;
>>
>> +     info->nr_file_dirty = global_page_state(NR_FILE_DIRTY);
>> +     info->nr_writeback = global_page_state(NR_WRITEBACK);
>> +     info->nr_unstable_nfs = global_page_state(NR_UNSTABLE_NFS);
>> +
>>       if (background >= dirty)
>>               background = dirty / 2;
>>       tsk = current;
>> @@ -433,8 +437,8 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
>>               background += background / 4;
>>               dirty += dirty / 4;
>>       }
>> -     *pbackground = background;
>> -     *pdirty = dirty;
>> +     info->background_thresh = background;
>> +     info->dirty_thresh = dirty;
>>  }
>>
>>  /*
>> @@ -478,12 +482,9 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
>>  static void balance_dirty_pages(struct address_space *mapping,
>>                               unsigned long write_chunk)
>>  {
>> -     unsigned long nr_reclaimable;
>> +     struct dirty_info sys_info;
>>       long bdi_nr_reclaimable;
>> -     unsigned long nr_writeback;
>>       long bdi_nr_writeback;
>> -     unsigned long background_thresh;
>> -     unsigned long dirty_thresh;
>>       unsigned long bdi_thresh;
>>       unsigned long pages_written = 0;
>>       unsigned long pause = 1;
>> @@ -498,22 +499,19 @@ static void balance_dirty_pages(struct address_space *mapping,
>>                       .range_cyclic   = 1,
>>               };
>>
>> -             nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
>> -                                     global_page_state(NR_UNSTABLE_NFS);
>> -             nr_writeback = global_page_state(NR_WRITEBACK);
>> -
>> -             global_dirty_limits(&background_thresh, &dirty_thresh);
>> +             global_dirty_info(&sys_info);
>>
>>               /*
>>                * Throttle it only when the background writeback cannot
>>                * catch-up. This avoids (excessively) small writeouts
>>                * when the bdi limits are ramping up.
>>                */
>> -             if (nr_reclaimable + nr_writeback <=
>> -                             (background_thresh + dirty_thresh) / 2)
>> +             if (dirty_info_reclaimable(&sys_info) + sys_info.nr_writeback <=
>> +                             (sys_info.background_thresh +
>> +                              sys_info.dirty_thresh) / 2)
>>                       break;
>>
>> -             bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
>> +             bdi_thresh = bdi_dirty_limit(bdi, sys_info.dirty_thresh);
>>               bdi_thresh = task_dirty_limit(current, bdi_thresh);
>>
>>               /*
>> @@ -542,7 +540,8 @@ static void balance_dirty_pages(struct address_space *mapping,
>>                */
>>               dirty_exceeded =
>>                       (bdi_nr_reclaimable + bdi_nr_writeback > bdi_thresh)
>> -                     || (nr_reclaimable + nr_writeback > dirty_thresh);
>> +                     || (dirty_info_reclaimable(&sys_info) +
>> +                          sys_info.nr_writeback > sys_info.dirty_thresh);
>>
>>               if (!dirty_exceeded)
>>                       break;
>> @@ -595,7 +594,8 @@ static void balance_dirty_pages(struct address_space *mapping,
>>        * background_thresh, to keep the amount of dirty memory low.
>>        */
>>       if ((laptop_mode && pages_written) ||
>> -         (!laptop_mode && (nr_reclaimable > background_thresh)))
>> +         (!laptop_mode && (dirty_info_reclaimable(&sys_info) >
>> +                           sys_info.background_thresh)))
>>               bdi_start_background_writeback(bdi);
>>  }
>>
>> @@ -655,21 +655,21 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
>>
>>  void throttle_vm_writeout(gfp_t gfp_mask)
>>  {
>> -     unsigned long background_thresh;
>> -     unsigned long dirty_thresh;
>> +     struct dirty_info sys_info;
>>
>>          for ( ; ; ) {
>> -             global_dirty_limits(&background_thresh, &dirty_thresh);
>> +             global_dirty_info(&sys_info);
>
> Get unnecessary nr_file_dirty.
>
>>
>>                  /*
>>                   * Boost the allowable dirty threshold a bit for page
>>                   * allocators so they don't get DoS'ed by heavy writers
>>                   */
>> -                dirty_thresh += dirty_thresh / 10;      /* wheeee... */
>> +             sys_info.dirty_thresh +=
>> +                     sys_info.dirty_thresh / 10;      /* wheeee... */
>>
>> -                if (global_page_state(NR_UNSTABLE_NFS) +
>> -                     global_page_state(NR_WRITEBACK) <= dirty_thresh)
>> -                             break;
>> +             if (sys_info.nr_unstable_nfs +
>> +                 sys_info.nr_writeback <= sys_info.dirty_thresh)
>> +                     break;
>>                  congestion_wait(BLK_RW_ASYNC, HZ/10);
>>
>>               /*
>> diff --git a/mm/vmstat.c b/mm/vmstat.c
>> index 0c3b504..ec95924 100644
>> --- a/mm/vmstat.c
>> +++ b/mm/vmstat.c
>> @@ -1044,6 +1044,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
>>  {
>>       unsigned long *v;
>>       int i, stat_items_size;
>> +     struct dirty_info dirty_info;
>>
>>       if (*pos >= ARRAY_SIZE(vmstat_text))
>>               return NULL;
>> @@ -1062,8 +1063,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
>>               v[i] = global_page_state(i);
>>       v += NR_VM_ZONE_STAT_ITEMS;
>>
>> -     global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
>> -                         v + NR_DIRTY_THRESHOLD);
>> +     global_dirty_info(&dirty_info);
>> +     v[NR_DIRTY_BG_THRESHOLD] = dirty_info.background_thresh;
>> +     v[NR_DIRTY_THRESHOLD] = dirty_info.dirty_thresh;
>>       v += NR_VM_WRITEBACK_STAT_ITEMS;
>
> Get unnecessary nr_file_dirty, nr_writeback, nr_unstable_nfs.
>
> The code itself doesn't have a problem. but although it makes code simple,
> sometime it get unnecessary information in that context. The global_page_state never
> cheap operation and we have been tried to reduce overhead in page-writeback.
> (16c4042f, e50e3720).
>
> Fortunately this patch doesn't increase balance_dirty_pages's overhead and
> things affected by this patch are not fast-path.
> So I think it doesn't have a problem.

I am doing a small restructure to address this feedback.  The plan is
to only use the new dirty_info structure for memcg limits and usage.
The global limits and usage variables and logic will not be changed.

> --
> Kind regards,
> Minchan Kim
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Greg Thelen <gthelen@google.com>
To: Minchan Kim <minchan.kim@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	containers@lists.osdl.org, Andrea Righi <arighi@develer.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>,
	Ciju Rajan K <ciju@linux.vnet.ibm.com>,
	David Rientjes <rientjes@google.com>,
	Wu Fengguang <fengguang.wu@intel.com>,
	Chad Talbott <ctalbott@google.com>,
	Justin TerAvest <teravest@google.com>,
	Vivek Goyal <vgoyal@redhat.com>
Subject: Re: [PATCH v5 4/9] writeback: create dirty_info structure
Date: Tue, 1 Mar 2011 13:13:58 -0800	[thread overview]
Message-ID: <AANLkTi=bo1H1+DxrgPjTRH7+kk9T6=BMO3mAC78=-1uG@mail.gmail.com> (raw)
In-Reply-To: <20110227163815.GC3226@barrios-desktop>

On Sun, Feb 27, 2011 at 8:38 AM, Minchan Kim <minchan.kim@gmail.com> wrote:
> On Fri, Feb 25, 2011 at 01:35:55PM -0800, Greg Thelen wrote:
>> Bundle dirty limits and dirty memory usage metrics into a dirty_info
>> structure to simplify interfaces of routines that need all.
>>
>> Signed-off-by: Greg Thelen <gthelen@google.com>
>> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
>
>> ---
>> Changelog since v4:
>> - Within new dirty_info structure, replaced nr_reclaimable with nr_file_dirty
>>   and nr_unstable_nfs to give callers finer grain dirty usage information.
>> - Added new dirty_info_reclaimable() function.
>> - Made more use of dirty_info structure in throttle_vm_writeout().
>>
>> Changelog since v3:
>> - This is a new patch in v4.
>>
>>  fs/fs-writeback.c         |    7 ++---
>>  include/linux/writeback.h |   15 ++++++++++++-
>>  mm/backing-dev.c          |   18 +++++++++------
>>  mm/page-writeback.c       |   50 ++++++++++++++++++++++----------------------
>>  mm/vmstat.c               |    6 +++-
>>  5 files changed, 57 insertions(+), 39 deletions(-)
>>
>> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
>> index 59c6e49..d75e4da 100644
>> --- a/fs/fs-writeback.c
>> +++ b/fs/fs-writeback.c
>> @@ -595,12 +595,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
>>
>>  static inline bool over_bground_thresh(void)
>>  {
>> -     unsigned long background_thresh, dirty_thresh;
>> +     struct dirty_info info;
>>
>> -     global_dirty_limits(&background_thresh, &dirty_thresh);
>> +     global_dirty_info(&info);
>>
>> -     return (global_page_state(NR_FILE_DIRTY) +
>> -             global_page_state(NR_UNSTABLE_NFS) > background_thresh);
>> +     return dirty_info_reclaimable(&info) > info.background_thresh;
>>  }
>
> Get unnecessary nr_writeback.
>
>>
>>  /*
>> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
>> index 0ead399..a06fb38 100644
>> --- a/include/linux/writeback.h
>> +++ b/include/linux/writeback.h
>> @@ -84,6 +84,19 @@ static inline void inode_sync_wait(struct inode *inode)
>>  /*
>>   * mm/page-writeback.c
>>   */
>> +struct dirty_info {
>> +     unsigned long dirty_thresh;
>> +     unsigned long background_thresh;
>> +     unsigned long nr_file_dirty;
>> +     unsigned long nr_writeback;
>> +     unsigned long nr_unstable_nfs;
>> +};
>> +
>> +static inline unsigned long dirty_info_reclaimable(struct dirty_info *info)
>> +{
>> +     return info->nr_file_dirty + info->nr_unstable_nfs;
>> +}
>> +
>>  #ifdef CONFIG_BLOCK
>>  void laptop_io_completion(struct backing_dev_info *info);
>>  void laptop_sync_completion(void);
>> @@ -124,7 +137,7 @@ struct ctl_table;
>>  int dirty_writeback_centisecs_handler(struct ctl_table *, int,
>>                                     void __user *, size_t *, loff_t *);
>>
>> -void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
>> +void global_dirty_info(struct dirty_info *info);
>>  unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
>>                              unsigned long dirty);
>>
>> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
>> index 027100d..17a06ab 100644
>> --- a/mm/backing-dev.c
>> +++ b/mm/backing-dev.c
>> @@ -66,8 +66,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>>  {
>>       struct backing_dev_info *bdi = m->private;
>>       struct bdi_writeback *wb = &bdi->wb;
>> -     unsigned long background_thresh;
>> -     unsigned long dirty_thresh;
>> +     struct dirty_info dirty_info;
>>       unsigned long bdi_thresh;
>>       unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
>>       struct inode *inode;
>> @@ -82,8 +81,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>>               nr_more_io++;
>>       spin_unlock(&inode_lock);
>>
>> -     global_dirty_limits(&background_thresh, &dirty_thresh);
>> -     bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
>> +     global_dirty_info(&dirty_info);
>> +     bdi_thresh = bdi_dirty_limit(bdi, dirty_info.dirty_thresh);
>>
>>  #define K(x) ((x) << (PAGE_SHIFT - 10))
>>       seq_printf(m,
>> @@ -99,9 +98,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>>                  "state:            %8lx\n",
>>                  (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
>>                  (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
>> -                K(bdi_thresh), K(dirty_thresh),
>> -                K(background_thresh), nr_dirty, nr_io, nr_more_io,
>> -                !list_empty(&bdi->bdi_list), bdi->state);
>> +                K(bdi_thresh),
>> +                K(dirty_info.dirty_thresh),
>> +                K(dirty_info.background_thresh),
>
> Get unnecessary nr_file_dirty, nr_writeback, nr_unstable_nfs.
>
>> +                nr_dirty,
>> +                nr_io,
>> +                nr_more_io,
>> +                !list_empty(&bdi->bdi_list),
>> +                bdi->state);
>>  #undef K
>>
>>       return 0;
>> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
>> index 4408e54..00424b9 100644
>> --- a/mm/page-writeback.c
>> +++ b/mm/page-writeback.c
>> @@ -398,7 +398,7 @@ unsigned long determine_dirtyable_memory(void)
>>  }
>>
>>  /*
>> - * global_dirty_limits - background-writeback and dirty-throttling thresholds
>> + * global_dirty_info - return dirty thresholds and usage metrics
>>   *
>>   * Calculate the dirty thresholds based on sysctl parameters
>>   * - vm.dirty_background_ratio  or  vm.dirty_background_bytes
>> @@ -406,7 +406,7 @@ unsigned long determine_dirtyable_memory(void)
>>   * The dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and
>>   * real-time tasks.
>>   */
>> -void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
>> +void global_dirty_info(struct dirty_info *info)
>>  {
>>       unsigned long background;
>>       unsigned long dirty;
>> @@ -426,6 +426,10 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
>>       else
>>               background = (dirty_background_ratio * available_memory) / 100;
>>
>> +     info->nr_file_dirty = global_page_state(NR_FILE_DIRTY);
>> +     info->nr_writeback = global_page_state(NR_WRITEBACK);
>> +     info->nr_unstable_nfs = global_page_state(NR_UNSTABLE_NFS);
>> +
>>       if (background >= dirty)
>>               background = dirty / 2;
>>       tsk = current;
>> @@ -433,8 +437,8 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
>>               background += background / 4;
>>               dirty += dirty / 4;
>>       }
>> -     *pbackground = background;
>> -     *pdirty = dirty;
>> +     info->background_thresh = background;
>> +     info->dirty_thresh = dirty;
>>  }
>>
>>  /*
>> @@ -478,12 +482,9 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
>>  static void balance_dirty_pages(struct address_space *mapping,
>>                               unsigned long write_chunk)
>>  {
>> -     unsigned long nr_reclaimable;
>> +     struct dirty_info sys_info;
>>       long bdi_nr_reclaimable;
>> -     unsigned long nr_writeback;
>>       long bdi_nr_writeback;
>> -     unsigned long background_thresh;
>> -     unsigned long dirty_thresh;
>>       unsigned long bdi_thresh;
>>       unsigned long pages_written = 0;
>>       unsigned long pause = 1;
>> @@ -498,22 +499,19 @@ static void balance_dirty_pages(struct address_space *mapping,
>>                       .range_cyclic   = 1,
>>               };
>>
>> -             nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
>> -                                     global_page_state(NR_UNSTABLE_NFS);
>> -             nr_writeback = global_page_state(NR_WRITEBACK);
>> -
>> -             global_dirty_limits(&background_thresh, &dirty_thresh);
>> +             global_dirty_info(&sys_info);
>>
>>               /*
>>                * Throttle it only when the background writeback cannot
>>                * catch-up. This avoids (excessively) small writeouts
>>                * when the bdi limits are ramping up.
>>                */
>> -             if (nr_reclaimable + nr_writeback <=
>> -                             (background_thresh + dirty_thresh) / 2)
>> +             if (dirty_info_reclaimable(&sys_info) + sys_info.nr_writeback <=
>> +                             (sys_info.background_thresh +
>> +                              sys_info.dirty_thresh) / 2)
>>                       break;
>>
>> -             bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
>> +             bdi_thresh = bdi_dirty_limit(bdi, sys_info.dirty_thresh);
>>               bdi_thresh = task_dirty_limit(current, bdi_thresh);
>>
>>               /*
>> @@ -542,7 +540,8 @@ static void balance_dirty_pages(struct address_space *mapping,
>>                */
>>               dirty_exceeded =
>>                       (bdi_nr_reclaimable + bdi_nr_writeback > bdi_thresh)
>> -                     || (nr_reclaimable + nr_writeback > dirty_thresh);
>> +                     || (dirty_info_reclaimable(&sys_info) +
>> +                          sys_info.nr_writeback > sys_info.dirty_thresh);
>>
>>               if (!dirty_exceeded)
>>                       break;
>> @@ -595,7 +594,8 @@ static void balance_dirty_pages(struct address_space *mapping,
>>        * background_thresh, to keep the amount of dirty memory low.
>>        */
>>       if ((laptop_mode && pages_written) ||
>> -         (!laptop_mode && (nr_reclaimable > background_thresh)))
>> +         (!laptop_mode && (dirty_info_reclaimable(&sys_info) >
>> +                           sys_info.background_thresh)))
>>               bdi_start_background_writeback(bdi);
>>  }
>>
>> @@ -655,21 +655,21 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
>>
>>  void throttle_vm_writeout(gfp_t gfp_mask)
>>  {
>> -     unsigned long background_thresh;
>> -     unsigned long dirty_thresh;
>> +     struct dirty_info sys_info;
>>
>>          for ( ; ; ) {
>> -             global_dirty_limits(&background_thresh, &dirty_thresh);
>> +             global_dirty_info(&sys_info);
>
> Get unnecessary nr_file_dirty.
>
>>
>>                  /*
>>                   * Boost the allowable dirty threshold a bit for page
>>                   * allocators so they don't get DoS'ed by heavy writers
>>                   */
>> -                dirty_thresh += dirty_thresh / 10;      /* wheeee... */
>> +             sys_info.dirty_thresh +=
>> +                     sys_info.dirty_thresh / 10;      /* wheeee... */
>>
>> -                if (global_page_state(NR_UNSTABLE_NFS) +
>> -                     global_page_state(NR_WRITEBACK) <= dirty_thresh)
>> -                             break;
>> +             if (sys_info.nr_unstable_nfs +
>> +                 sys_info.nr_writeback <= sys_info.dirty_thresh)
>> +                     break;
>>                  congestion_wait(BLK_RW_ASYNC, HZ/10);
>>
>>               /*
>> diff --git a/mm/vmstat.c b/mm/vmstat.c
>> index 0c3b504..ec95924 100644
>> --- a/mm/vmstat.c
>> +++ b/mm/vmstat.c
>> @@ -1044,6 +1044,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
>>  {
>>       unsigned long *v;
>>       int i, stat_items_size;
>> +     struct dirty_info dirty_info;
>>
>>       if (*pos >= ARRAY_SIZE(vmstat_text))
>>               return NULL;
>> @@ -1062,8 +1063,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
>>               v[i] = global_page_state(i);
>>       v += NR_VM_ZONE_STAT_ITEMS;
>>
>> -     global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
>> -                         v + NR_DIRTY_THRESHOLD);
>> +     global_dirty_info(&dirty_info);
>> +     v[NR_DIRTY_BG_THRESHOLD] = dirty_info.background_thresh;
>> +     v[NR_DIRTY_THRESHOLD] = dirty_info.dirty_thresh;
>>       v += NR_VM_WRITEBACK_STAT_ITEMS;
>
> Get unnecessary nr_file_dirty, nr_writeback, nr_unstable_nfs.
>
> The code itself doesn't have a problem. but although it makes code simple,
> sometime it get unnecessary information in that context. The global_page_state never
> cheap operation and we have been tried to reduce overhead in page-writeback.
> (16c4042f, e50e3720).
>
> Fortunately this patch doesn't increase balance_dirty_pages's overhead and
> things affected by this patch are not fast-path.
> So I think it doesn't have a problem.

I am doing a small restructure to address this feedback.  The plan is
to only use the new dirty_info structure for memcg limits and usage.
The global limits and usage variables and logic will not be changed.

> --
> Kind regards,
> Minchan Kim
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

  reply	other threads:[~2011-03-01 21:13 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-25 21:35 [PATCH v5 0/9] memcg: per cgroup dirty page accounting Greg Thelen
2011-02-25 21:35 ` Greg Thelen
2011-02-25 21:35 ` [PATCH v5 1/9] memcg: document cgroup dirty memory interfaces Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-28  2:27   ` KAMEZAWA Hiroyuki
2011-02-28  2:27     ` KAMEZAWA Hiroyuki
2011-03-11 10:19   ` Balbir Singh
2011-03-11 10:19     ` Balbir Singh
2011-02-25 21:35 ` [PATCH v5 2/9] memcg: add page_cgroup flags for dirty page tracking Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-27 15:56   ` Minchan Kim
2011-02-27 15:56     ` Minchan Kim
2011-02-25 21:35 ` [PATCH v5 3/9] writeback: convert variables to unsigned Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-27 16:07   ` Minchan Kim
2011-02-27 16:07     ` Minchan Kim
2011-02-28 23:52     ` Greg Thelen
2011-02-28 23:52       ` Greg Thelen
2011-03-01  4:50       ` Minchan Kim
2011-03-01  4:50         ` Minchan Kim
2011-03-01  4:50         ` Minchan Kim
2011-02-28  2:28   ` KAMEZAWA Hiroyuki
2011-02-28  2:28     ` KAMEZAWA Hiroyuki
2011-02-25 21:35 ` [PATCH v5 4/9] writeback: create dirty_info structure Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-27 16:38   ` Minchan Kim
2011-02-27 16:38     ` Minchan Kim
2011-03-01 21:13     ` Greg Thelen [this message]
2011-03-01 21:13       ` Greg Thelen
2011-02-25 21:35 ` [PATCH v5 5/9] memcg: add dirty page accounting infrastructure Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-27 16:47   ` Minchan Kim
2011-02-27 16:47     ` Minchan Kim
2011-02-28  2:34     ` KAMEZAWA Hiroyuki
2011-02-28  2:34       ` KAMEZAWA Hiroyuki
2011-02-25 21:35 ` [PATCH v5 6/9] memcg: add kernel calls for memcg dirty page stats Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-27 17:01   ` Minchan Kim
2011-02-27 17:01     ` Minchan Kim
2011-02-28  2:40     ` KAMEZAWA Hiroyuki
2011-02-28  2:40       ` KAMEZAWA Hiroyuki
2011-03-10 16:18       ` Greg Thelen
2011-03-10 16:18         ` Greg Thelen
2011-02-25 21:35 ` [PATCH v5 7/9] memcg: add dirty limits to mem_cgroup Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-28  2:46   ` KAMEZAWA Hiroyuki
2011-02-28  2:46     ` KAMEZAWA Hiroyuki
2011-02-25 21:35 ` [PATCH v5 8/9] memcg: add cgroupfs interface to memcg dirty limits Greg Thelen
2011-02-25 21:35   ` Greg Thelen
2011-02-25 21:36 ` [PATCH v5 9/9] memcg: check memcg dirty limits in page writeback Greg Thelen
2011-02-25 21:36   ` Greg Thelen
2011-03-01  4:44   ` Minchan Kim
2011-03-01  4:44     ` Minchan Kim
2011-03-02 23:17   ` Vivek Goyal
2011-03-02 23:17     ` Vivek Goyal
2011-03-04  0:30     ` Greg Thelen
2011-03-04  0:30       ` Greg Thelen
2011-02-26  4:16 ` [PATCH v5 0/9] memcg: per cgroup dirty page accounting Greg Thelen
2011-02-26  4:16   ` Greg Thelen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='AANLkTi=bo1H1+DxrgPjTRH7+kk9T6=BMO3mAC78=-1uG@mail.gmail.com' \
    --to=gthelen@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=arighi@develer.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=ciju@linux.vnet.ibm.com \
    --cc=containers@lists.osdl.org \
    --cc=ctalbott@google.com \
    --cc=fengguang.wu@intel.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan.kim@gmail.com \
    --cc=nishimura@mxp.nes.nec.co.jp \
    --cc=rientjes@google.com \
    --cc=teravest@google.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.