* [RFC v1] writeback: add elastic bdi in cgwb bdp
@ 2019-10-20 13:00 Hillf Danton
2019-10-21 23:33 ` kbuild test robot
2019-10-22 5:56 ` kbuild test robot
0 siblings, 2 replies; 3+ messages in thread
From: Hillf Danton @ 2019-10-20 13:00 UTC (permalink / raw)
To: linux-mm
Cc: fsdev, Andrew Morton, linux-kernel, Tejun Heo, Jan Kara,
Fengguang Wu, Johannes Weiner, Shakeel Butt, Minchan Kim,
Mel Gorman, Hillf Danton
The elastic bdi is the mirror bdi of spinning disks, SSD, USB and
other storage devices/instruments on market. The performance of
ebdi goes up and down as the pattern of IO dispatched changes. It
can be approximately estimated as below.
P = j(..., IO pattern);
In ebdi's view, the bandwidth currently measured in balancing dirty
pages has close relation to its performance because the former is a
part of the latter as represented below.
B = y(P);
The functions above suggest that there may be a layer violation if
filesystems don't care what is measured at the mm layer, while it
could be better measured somewhere below fs.
It is measured however to the extent that makes every judge happy,
and is playing a role in dispatching IO with the IO pattern entirely
ignored that is volatile in nature.
And it helps to throttle the dirty speed, with the figure ignored
that DRAM in general is x10 faster than ebdi. If B is half of P for
instance, then it is near 5% of dirty speed, only 2 points from the
figure in the snippet below.
/*
* If ratelimit_pages is too high then we can get into dirty-data overload
* if a large number of processes all perform writes at the same time.
* If it is too low then SMP machines will call the (expensive)
* get_writeback_state too often.
*
* Here we set ratelimit_pages to a level which ensures that when all CPUs are
* dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
* thresholds.
*/
To prevent dirty speed from running away from laundry speed in bdp,
ebdi suggests the walk-dog method to consider in assumption that a
leash churns less in IO pattern.
V1 is based on 5.4-rc3.
Changes since v0
- add CGWB_BDP_WITH_EBDI in mm/Kconfig
- drop wakeup in wbc_detach_inode()
- add wakeup in wb_workfn()
Cc: Tejun Heo <tj@kernel.org>
Cc: Jan Kara <jack@suse.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Hillf Danton <hdanton@sina.com>
---
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -204,6 +204,14 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config MEMORY_BALLOON
bool
+config CGWB_BDP_WITH_EBDI
+ bool
+ help
+ This puts the walk-dog method in balancing dirty pages
+ instead of measuring bandwidth.
+
+ Say N if unsure.
+
#
# support for memory balloon compaction
config BALLOON_COMPACTION
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -170,6 +170,10 @@ struct bdi_writeback {
struct list_head bdi_node; /* anchored at bdi->wb_list */
+#ifdef CONFIG_CGWB_BDP_WITH_EBDI
+ struct wait_queue_head bdp_waitq;
+#endif
+
#ifdef CONFIG_CGROUP_WRITEBACK
struct percpu_ref refcnt; /* used only for !root wb's */
struct fprop_local_percpu memcg_completions;
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -324,6 +324,9 @@ static int wb_init(struct bdi_writeback
goto out_destroy_stat;
}
+ if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
+ init_waitqueue_head(&wb->bdp_waitq);
+
return 0;
out_destroy_stat:
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1551,6 +1551,41 @@ static inline void wb_dirty_limits(struc
}
}
+#ifdef CONFIG_CGWB_BDP_WITH_EBDI
+static bool cgwb_bdp_should_throttle(struct bdi_writeback *wb)
+{
+ struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
+
+ if (fatal_signal_pending(current))
+ return false;
+
+ gdtc.avail = global_dirtyable_memory();
+
+ domain_dirty_limits(&gdtc);
+
+ gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
+ global_node_page_state(NR_UNSTABLE_NFS) +
+ global_node_page_state(NR_WRITEBACK);
+
+ if (gdtc.dirty < gdtc.bg_thresh)
+ return false;
+
+ if (!writeback_in_progress(wb))
+ wb_start_background_writeback(wb);
+
+ return gdtc.dirty > gdtc.thresh &&
+ wb_stat(wb, WB_DIRTIED) >
+ wb_stat(wb, WB_WRITTEN) +
+ wb_stat_error();
+}
+
+static inline void cgwb_bdp(struct bdi_writeback *wb)
+{
+ wait_event_interruptible_timeout(wb->bdp_waitq,
+ !cgwb_bdp_should_throttle(wb), HZ);
+}
+#endif
+
/*
* balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force
@@ -1910,7 +1945,10 @@ void balance_dirty_pages_ratelimited(str
preempt_enable();
if (unlikely(current->nr_dirtied >= ratelimit))
- balance_dirty_pages(wb, current->nr_dirtied);
+ if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
+ cgwb_bdp(wb);
+ else
+ balance_dirty_pages(wb, current->nr_dirtied);
wb_put(wb);
}
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -811,6 +811,9 @@ static long wb_split_bdi_pages(struct bd
if (nr_pages == LONG_MAX)
return LONG_MAX;
+ if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
+ return nr_pages;
+
/*
* This may be called on clean wb's and proportional distribution
* may not make sense, just use the original @nr_pages in those
@@ -1598,6 +1601,8 @@ static long writeback_chunk_size(struct
*/
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
pages = LONG_MAX;
+ else if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
+ pages = work->nr_pages;
else {
pages = min(wb->avg_write_bandwidth / 2,
global_wb_domain.dirty_limit / DIRTY_SCOPE);
@@ -2092,6 +2097,10 @@ void wb_workfn(struct work_struct *work)
wb_wakeup_delayed(wb);
current->flags &= ~PF_SWAPWRITE;
+
+ if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
+ if (waitqueue_active(&wb->bdp_waitq))
+ wake_up_all(&wb->bdp_waitq);
}
/*
--
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC v1] writeback: add elastic bdi in cgwb bdp
2019-10-20 13:00 [RFC v1] writeback: add elastic bdi in cgwb bdp Hillf Danton
@ 2019-10-21 23:33 ` kbuild test robot
2019-10-22 5:56 ` kbuild test robot
1 sibling, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2019-10-21 23:33 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 5738 bytes --]
Hi Hillf,
[FYI, it's a private test report for your RFC patch.]
[auto build test ERROR on linus/master]
[cannot apply to v5.4-rc4 next-20191021]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Hillf-Danton/writeback-add-elastic-bdi-in-cgwb-bdp/20191022-070010
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 7d194c2100ad2a6dded545887d02754948ca5241
config: i386-tinyconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
mm/page-writeback.c: In function 'balance_dirty_pages_ratelimited':
>> mm/page-writeback.c:1949:4: error: implicit declaration of function 'cgwb_bdp' [-Werror=implicit-function-declaration]
cgwb_bdp(wb);
^~~~~~~~
mm/page-writeback.c:1947:5: warning: suggest explicit braces to avoid ambiguous 'else' [-Wdangling-else]
if (unlikely(current->nr_dirtied >= ratelimit))
^
cc1: some warnings being treated as errors
--
In file included from mm/backing-dev.c:3:0:
mm/backing-dev.c: In function 'wb_init':
>> mm/backing-dev.c:328:26: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
init_waitqueue_head(&wb->bdp_waitq);
^
include/linux/wait.h:67:26: note: in definition of macro 'init_waitqueue_head'
__init_waitqueue_head((wq_head), #wq_head, &__key); \
^~~~~~~
--
fs/fs-writeback.c: In function 'wb_workfn':
>> fs/fs-writeback.c:2102:27: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
if (waitqueue_active(&wb->bdp_waitq))
^~
In file included from include/linux/mmzone.h:10:0,
from include/linux/gfp.h:6,
from include/linux/slab.h:15,
from fs/fs-writeback.c:20:
fs/fs-writeback.c:2103:19: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
wake_up_all(&wb->bdp_waitq);
^
include/linux/wait.h:210:36: note: in definition of macro 'wake_up_all'
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
^
vim +/cgwb_bdp +1949 mm/page-writeback.c
1885
1886 /**
1887 * balance_dirty_pages_ratelimited - balance dirty memory state
1888 * @mapping: address_space which was dirtied
1889 *
1890 * Processes which are dirtying memory should call in here once for each page
1891 * which was newly dirtied. The function will periodically check the system's
1892 * dirty state and will initiate writeback if needed.
1893 *
1894 * On really big machines, get_writeback_state is expensive, so try to avoid
1895 * calling it too often (ratelimiting). But once we're over the dirty memory
1896 * limit we decrease the ratelimiting by a lot, to prevent individual processes
1897 * from overshooting the limit by (ratelimit_pages) each.
1898 */
1899 void balance_dirty_pages_ratelimited(struct address_space *mapping)
1900 {
1901 struct inode *inode = mapping->host;
1902 struct backing_dev_info *bdi = inode_to_bdi(inode);
1903 struct bdi_writeback *wb = NULL;
1904 int ratelimit;
1905 int *p;
1906
1907 if (!bdi_cap_account_dirty(bdi))
1908 return;
1909
1910 if (inode_cgwb_enabled(inode))
1911 wb = wb_get_create_current(bdi, GFP_KERNEL);
1912 if (!wb)
1913 wb = &bdi->wb;
1914
1915 ratelimit = current->nr_dirtied_pause;
1916 if (wb->dirty_exceeded)
1917 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1918
1919 preempt_disable();
1920 /*
1921 * This prevents one CPU to accumulate too many dirtied pages without
1922 * calling into balance_dirty_pages(), which can happen when there are
1923 * 1000+ tasks, all of them start dirtying pages at exactly the same
1924 * time, hence all honoured too large initial task->nr_dirtied_pause.
1925 */
1926 p = this_cpu_ptr(&bdp_ratelimits);
1927 if (unlikely(current->nr_dirtied >= ratelimit))
1928 *p = 0;
1929 else if (unlikely(*p >= ratelimit_pages)) {
1930 *p = 0;
1931 ratelimit = 0;
1932 }
1933 /*
1934 * Pick up the dirtied pages by the exited tasks. This avoids lots of
1935 * short-lived tasks (eg. gcc invocations in a kernel build) escaping
1936 * the dirty throttling and livelock other long-run dirtiers.
1937 */
1938 p = this_cpu_ptr(&dirty_throttle_leaks);
1939 if (*p > 0 && current->nr_dirtied < ratelimit) {
1940 unsigned long nr_pages_dirtied;
1941 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1942 *p -= nr_pages_dirtied;
1943 current->nr_dirtied += nr_pages_dirtied;
1944 }
1945 preempt_enable();
1946
1947 if (unlikely(current->nr_dirtied >= ratelimit))
1948 if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
> 1949 cgwb_bdp(wb);
1950 else
1951 balance_dirty_pages(wb, current->nr_dirtied);
1952
1953 wb_put(wb);
1954 }
1955 EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1956
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 7207 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC v1] writeback: add elastic bdi in cgwb bdp
2019-10-20 13:00 [RFC v1] writeback: add elastic bdi in cgwb bdp Hillf Danton
2019-10-21 23:33 ` kbuild test robot
@ 2019-10-22 5:56 ` kbuild test robot
1 sibling, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2019-10-22 5:56 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 5500 bytes --]
Hi Hillf,
[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on linus/master]
[cannot apply to v5.4-rc4 next-20191021]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Hillf-Danton/writeback-add-elastic-bdi-in-cgwb-bdp/20191022-070010
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 7d194c2100ad2a6dded545887d02754948ca5241
config: x86_64-randconfig-f003-201942 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
In file included from include/linux/export.h:44:0,
from include/linux/linkage.h:7,
from include/linux/kernel.h:8,
from fs/fs-writeback.c:17:
fs/fs-writeback.c: In function 'wb_workfn':
fs/fs-writeback.c:2102:27: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
if (waitqueue_active(&wb->bdp_waitq))
^
include/linux/compiler.h:58:52: note: in definition of macro '__trace_if_var'
#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
^~~~
>> fs/fs-writeback.c:2102:3: note: in expansion of macro 'if'
if (waitqueue_active(&wb->bdp_waitq))
^~
fs/fs-writeback.c:2102:27: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
if (waitqueue_active(&wb->bdp_waitq))
^
include/linux/compiler.h:58:61: note: in definition of macro '__trace_if_var'
#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
^~~~
>> fs/fs-writeback.c:2102:3: note: in expansion of macro 'if'
if (waitqueue_active(&wb->bdp_waitq))
^~
fs/fs-writeback.c:2102:27: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
if (waitqueue_active(&wb->bdp_waitq))
^
include/linux/compiler.h:69:3: note: in definition of macro '__trace_if_value'
(cond) ? \
^~~~
include/linux/compiler.h:56:28: note: in expansion of macro '__trace_if_var'
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
^~~~~~~~~~~~~~
>> fs/fs-writeback.c:2102:3: note: in expansion of macro 'if'
if (waitqueue_active(&wb->bdp_waitq))
^~
In file included from include/linux/mmzone.h:10:0,
from include/linux/gfp.h:6,
from include/linux/slab.h:15,
from fs/fs-writeback.c:20:
fs/fs-writeback.c:2103:19: error: 'struct bdi_writeback' has no member named 'bdp_waitq'
wake_up_all(&wb->bdp_waitq);
^
include/linux/wait.h:210:36: note: in definition of macro 'wake_up_all'
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
^
vim +/if +2102 fs/fs-writeback.c
2057
2058 /*
2059 * Handle writeback of dirty data for the device backed by this bdi. Also
2060 * reschedules periodically and does kupdated style flushing.
2061 */
2062 void wb_workfn(struct work_struct *work)
2063 {
2064 struct bdi_writeback *wb = container_of(to_delayed_work(work),
2065 struct bdi_writeback, dwork);
2066 long pages_written;
2067
2068 set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
2069 current->flags |= PF_SWAPWRITE;
2070
2071 if (likely(!current_is_workqueue_rescuer() ||
2072 !test_bit(WB_registered, &wb->state))) {
2073 /*
2074 * The normal path. Keep writing back @wb until its
2075 * work_list is empty. Note that this path is also taken
2076 * if @wb is shutting down even when we're running off the
2077 * rescuer as work_list needs to be drained.
2078 */
2079 do {
2080 pages_written = wb_do_writeback(wb);
2081 trace_writeback_pages_written(pages_written);
2082 } while (!list_empty(&wb->work_list));
2083 } else {
2084 /*
2085 * bdi_wq can't get enough workers and we're running off
2086 * the emergency worker. Don't hog it. Hopefully, 1024 is
2087 * enough for efficient IO.
2088 */
2089 pages_written = writeback_inodes_wb(wb, 1024,
2090 WB_REASON_FORKER_THREAD);
2091 trace_writeback_pages_written(pages_written);
2092 }
2093
2094 if (!list_empty(&wb->work_list))
2095 wb_wakeup(wb);
2096 else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
2097 wb_wakeup_delayed(wb);
2098
2099 current->flags &= ~PF_SWAPWRITE;
2100
2101 if (IS_ENABLED(CONFIG_CGWB_BDP_WITH_EBDI))
> 2102 if (waitqueue_active(&wb->bdp_waitq))
2103 wake_up_all(&wb->bdp_waitq);
2104 }
2105
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 32473 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-10-22 5:56 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-20 13:00 [RFC v1] writeback: add elastic bdi in cgwb bdp Hillf Danton
2019-10-21 23:33 ` kbuild test robot
2019-10-22 5:56 ` kbuild test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.