* [PATCH] mm/damon: simplify stop mechanism
@ 2021-10-26 15:30 Changbin Du
2021-10-26 18:42 ` SeongJae Park
0 siblings, 1 reply; 4+ messages in thread
From: Changbin Du @ 2021-10-26 15:30 UTC (permalink / raw)
To: Andrew Morton; +Cc: SeongJae Park, linux-mm, linux-kernel, Changbin Du
An kernel thread can exit gracefully with kthread_stop(). So we don't need
a new flag 'kdamond_stop'. And to make sure the task struct is not freed
when accessing it, get task struct on start and put it on stop.
And since the return value of 'before_terminate' callback is never used,
we make it have no return value.
Signed-off-by: Changbin Du <changbin.du@gmail.com>
---
include/linux/damon.h | 3 +--
mm/damon/core.c | 59 +++++++++++++------------------------------
mm/damon/dbgfs.c | 5 ++--
3 files changed, 20 insertions(+), 47 deletions(-)
diff --git a/include/linux/damon.h b/include/linux/damon.h
index a14b3cc54cab..041966786270 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -322,7 +322,7 @@ struct damon_callback {
int (*before_start)(struct damon_ctx *context);
int (*after_sampling)(struct damon_ctx *context);
int (*after_aggregation)(struct damon_ctx *context);
- int (*before_terminate)(struct damon_ctx *context);
+ void (*before_terminate)(struct damon_ctx *context);
};
/**
@@ -381,7 +381,6 @@ struct damon_ctx {
/* public: */
struct task_struct *kdamond;
- bool kdamond_stop;
struct mutex kdamond_lock;
struct damon_primitive primitive;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 46a6afea3030..46401c56bfb2 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -390,17 +390,6 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
return sz;
}
-static bool damon_kdamond_running(struct damon_ctx *ctx)
-{
- bool running;
-
- mutex_lock(&ctx->kdamond_lock);
- running = ctx->kdamond != NULL;
- mutex_unlock(&ctx->kdamond_lock);
-
- return running;
-}
-
static int kdamond_fn(void *data);
/*
@@ -418,13 +407,13 @@ static int __damon_start(struct damon_ctx *ctx)
mutex_lock(&ctx->kdamond_lock);
if (!ctx->kdamond) {
err = 0;
- ctx->kdamond_stop = false;
ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
nr_running_ctxs);
if (IS_ERR(ctx->kdamond)) {
err = PTR_ERR(ctx->kdamond);
ctx->kdamond = NULL;
- }
+ } else
+ get_task_struct(ctx->kdamond);
}
mutex_unlock(&ctx->kdamond_lock);
@@ -474,18 +463,18 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
*/
static int __damon_stop(struct damon_ctx *ctx)
{
+ int ret = -EPERM;
+
mutex_lock(&ctx->kdamond_lock);
if (ctx->kdamond) {
- ctx->kdamond_stop = true;
- mutex_unlock(&ctx->kdamond_lock);
- while (damon_kdamond_running(ctx))
- usleep_range(ctx->sample_interval,
- ctx->sample_interval * 2);
- return 0;
+ kthread_stop(ctx->kdamond);
+ put_task_struct(ctx->kdamond);
+ ctx->kdamond = NULL;
+ ret = 0;
}
mutex_unlock(&ctx->kdamond_lock);
- return -EPERM;
+ return ret;
}
/**
@@ -925,12 +914,8 @@ static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
static bool kdamond_need_stop(struct damon_ctx *ctx)
{
struct damon_target *t;
- bool stop;
- mutex_lock(&ctx->kdamond_lock);
- stop = ctx->kdamond_stop;
- mutex_unlock(&ctx->kdamond_lock);
- if (stop)
+ if (kthread_should_stop())
return true;
if (!ctx->primitive.target_valid)
@@ -1021,13 +1006,6 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
return -EBUSY;
}
-static void set_kdamond_stop(struct damon_ctx *ctx)
-{
- mutex_lock(&ctx->kdamond_lock);
- ctx->kdamond_stop = true;
- mutex_unlock(&ctx->kdamond_lock);
-}
-
/*
* The monitoring daemon that runs as a kernel thread
*/
@@ -1038,17 +1016,18 @@ static int kdamond_fn(void *data)
struct damon_region *r, *next;
unsigned int max_nr_accesses = 0;
unsigned long sz_limit = 0;
+ bool done = false;
pr_debug("kdamond (%d) starts\n", current->pid);
if (ctx->primitive.init)
ctx->primitive.init(ctx);
if (ctx->callback.before_start && ctx->callback.before_start(ctx))
- set_kdamond_stop(ctx);
+ done = true;
sz_limit = damon_region_sz_limit(ctx);
- while (!kdamond_need_stop(ctx)) {
+ while (!kdamond_need_stop(ctx) && !done) {
if (kdamond_wait_activation(ctx))
continue;
@@ -1056,7 +1035,7 @@ static int kdamond_fn(void *data)
ctx->primitive.prepare_access_checks(ctx);
if (ctx->callback.after_sampling &&
ctx->callback.after_sampling(ctx))
- set_kdamond_stop(ctx);
+ done = true;
usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
@@ -1069,7 +1048,7 @@ static int kdamond_fn(void *data)
sz_limit);
if (ctx->callback.after_aggregation &&
ctx->callback.after_aggregation(ctx))
- set_kdamond_stop(ctx);
+ done = true;
kdamond_apply_schemes(ctx);
kdamond_reset_aggregated(ctx);
kdamond_split_regions(ctx);
@@ -1088,16 +1067,12 @@ static int kdamond_fn(void *data)
damon_destroy_region(r, t);
}
- if (ctx->callback.before_terminate &&
- ctx->callback.before_terminate(ctx))
- set_kdamond_stop(ctx);
+ if (ctx->callback.before_terminate)
+ ctx->callback.before_terminate(ctx);
if (ctx->primitive.cleanup)
ctx->primitive.cleanup(ctx);
pr_debug("kdamond (%d) finishes\n", current->pid);
- mutex_lock(&ctx->kdamond_lock);
- ctx->kdamond = NULL;
- mutex_unlock(&ctx->kdamond_lock);
mutex_lock(&damon_lock);
nr_running_ctxs--;
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index befb27a29aab..eccc14b34901 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -645,18 +645,17 @@ static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx)
debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]);
}
-static int dbgfs_before_terminate(struct damon_ctx *ctx)
+static void dbgfs_before_terminate(struct damon_ctx *ctx)
{
struct damon_target *t, *next;
if (!targetid_is_pid(ctx))
- return 0;
+ return;
damon_for_each_target_safe(t, next, ctx) {
put_pid((struct pid *)t->id);
damon_destroy_target(t);
}
- return 0;
}
static struct damon_ctx *dbgfs_new_ctx(void)
--
2.32.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/damon: simplify stop mechanism
2021-10-26 15:30 [PATCH] mm/damon: simplify stop mechanism Changbin Du
@ 2021-10-26 18:42 ` SeongJae Park
2021-10-27 6:13 ` Changbin Du
0 siblings, 1 reply; 4+ messages in thread
From: SeongJae Park @ 2021-10-26 18:42 UTC (permalink / raw)
To: Changbin Du; +Cc: Andrew Morton, SeongJae Park, linux-mm, linux-kernel
Hello Changbin,
On Tue, 26 Oct 2021 23:30:33 +0800 Changbin Du <changbin.du@gmail.com> wrote:
> An kernel thread can exit gracefully with kthread_stop(). So we don't need a
> new flag 'kdamond_stop'. And to make sure the task struct is not freed when
> accessing it, get task struct on start and put it on stop.
We previously considered using kthread_stop() here. However, we resulted in
current code because kdamond can be self-terminated when all target processes
are invalid[1].
Seems this patch is also not fully prepared for the self-termination case. I
left some comments below.
[1] https://lore.kernel.org/linux-mm/20210624102623.24563-1-sjpark@amazon.de/
>
> And since the return value of 'before_terminate' callback is never used,
> we make it have no return value.
This looks nice to me. Could you please send this again as a separate patch?
>
> Signed-off-by: Changbin Du <changbin.du@gmail.com>
> ---
> include/linux/damon.h | 3 +--
> mm/damon/core.c | 59 +++++++++++++------------------------------
> mm/damon/dbgfs.c | 5 ++--
> 3 files changed, 20 insertions(+), 47 deletions(-)
>
> diff --git a/include/linux/damon.h b/include/linux/damon.h
> index a14b3cc54cab..041966786270 100644
> --- a/include/linux/damon.h
> +++ b/include/linux/damon.h
[...]
> --- a/mm/damon/core.c
> +++ b/mm/damon/core.c
[...]
> @@ -1069,7 +1048,7 @@ static int kdamond_fn(void *data)
> sz_limit);
> if (ctx->callback.after_aggregation &&
> ctx->callback.after_aggregation(ctx))
> - set_kdamond_stop(ctx);
> + done = true;
> kdamond_apply_schemes(ctx);
> kdamond_reset_aggregated(ctx);
> kdamond_split_regions(ctx);
> @@ -1088,16 +1067,12 @@ static int kdamond_fn(void *data)
> damon_destroy_region(r, t);
> }
>
> - if (ctx->callback.before_terminate &&
> - ctx->callback.before_terminate(ctx))
> - set_kdamond_stop(ctx);
> + if (ctx->callback.before_terminate)
> + ctx->callback.before_terminate(ctx);
> if (ctx->primitive.cleanup)
> ctx->primitive.cleanup(ctx);
>
> pr_debug("kdamond (%d) finishes\n", current->pid);
> - mutex_lock(&ctx->kdamond_lock);
> - ctx->kdamond = NULL;
> - mutex_unlock(&ctx->kdamond_lock);
When kdamond is self-terminating, ctx->kdamond will not be nullfified. As a
result, this patch can introduce some errors like below:
# cd /sys/kernel/debug/damon
# sleep 60 &
[1] 1926
# echo $(pidof sleep) > target_ids
# echo on > monitor_on
# cat monitor_on
on
# # after 60 seconds, sleep finishes and kdamond is self-terminated
# cat monitor_on
off
# echo 42 > target_ids
bash: echo: write error: Device or resource busy
If we simply restore the nullification here with the mutex locking, we would
result in a deadlock because __damon_stop() calls kthread_stop() while holding
ctx->kdamond_lock.
Also, the reference count of ctx->kdamond, which increased by __damon_start(),
would not be decreased in the case.
If I'm missing something, please let me know.
Thanks,
SJ
[...]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/damon: simplify stop mechanism
2021-10-26 18:42 ` SeongJae Park
@ 2021-10-27 6:13 ` Changbin Du
2021-10-27 10:51 ` SeongJae Park
0 siblings, 1 reply; 4+ messages in thread
From: Changbin Du @ 2021-10-27 6:13 UTC (permalink / raw)
To: SeongJae Park; +Cc: Changbin Du, Andrew Morton, linux-mm, linux-kernel
On Tue, Oct 26, 2021 at 06:42:03PM +0000, SeongJae Park wrote:
> Hello Changbin,
>
> On Tue, 26 Oct 2021 23:30:33 +0800 Changbin Du <changbin.du@gmail.com> wrote:
>
> > An kernel thread can exit gracefully with kthread_stop(). So we don't need a
> > new flag 'kdamond_stop'. And to make sure the task struct is not freed when
> > accessing it, get task struct on start and put it on stop.
>
> We previously considered using kthread_stop() here. However, we resulted in
> current code because kdamond can be self-terminated when all target processes
> are invalid[1].
>
> Seems this patch is also not fully prepared for the self-termination case. I
> left some comments below.
>
> [1] https://lore.kernel.org/linux-mm/20210624102623.24563-1-sjpark@amazon.de/
>
> >
> > And since the return value of 'before_terminate' callback is never used,
> > we make it have no return value.
>
> This looks nice to me. Could you please send this again as a separate patch?
>
Sure, I'll do it later.
> >
> > Signed-off-by: Changbin Du <changbin.du@gmail.com>
> > ---
> > include/linux/damon.h | 3 +--
> > mm/damon/core.c | 59 +++++++++++++------------------------------
> > mm/damon/dbgfs.c | 5 ++--
> > 3 files changed, 20 insertions(+), 47 deletions(-)
> >
> > diff --git a/include/linux/damon.h b/include/linux/damon.h
> > index a14b3cc54cab..041966786270 100644
> > --- a/include/linux/damon.h
> > +++ b/include/linux/damon.h
> [...]
> > --- a/mm/damon/core.c
> > +++ b/mm/damon/core.c
> [...]
> > @@ -1069,7 +1048,7 @@ static int kdamond_fn(void *data)
> > sz_limit);
> > if (ctx->callback.after_aggregation &&
> > ctx->callback.after_aggregation(ctx))
> > - set_kdamond_stop(ctx);
> > + done = true;
> > kdamond_apply_schemes(ctx);
> > kdamond_reset_aggregated(ctx);
> > kdamond_split_regions(ctx);
> > @@ -1088,16 +1067,12 @@ static int kdamond_fn(void *data)
> > damon_destroy_region(r, t);
> > }
> >
> > - if (ctx->callback.before_terminate &&
> > - ctx->callback.before_terminate(ctx))
> > - set_kdamond_stop(ctx);
> > + if (ctx->callback.before_terminate)
> > + ctx->callback.before_terminate(ctx);
> > if (ctx->primitive.cleanup)
> > ctx->primitive.cleanup(ctx);
> >
> > pr_debug("kdamond (%d) finishes\n", current->pid);
> > - mutex_lock(&ctx->kdamond_lock);
> > - ctx->kdamond = NULL;
> > - mutex_unlock(&ctx->kdamond_lock);
>
> When kdamond is self-terminating, ctx->kdamond will not be nullfified. As a
> result, this patch can introduce some errors like below:
>
> # cd /sys/kernel/debug/damon
> # sleep 60 &
> [1] 1926
> # echo $(pidof sleep) > target_ids
> # echo on > monitor_on
> # cat monitor_on
> on
> # # after 60 seconds, sleep finishes and kdamond is self-terminated
> # cat monitor_on
> off
> # echo 42 > target_ids
> bash: echo: write error: Device or resource busy
>
> If we simply restore the nullification here with the mutex locking, we would
> result in a deadlock because __damon_stop() calls kthread_stop() while holding
> ctx->kdamond_lock.
>
> Also, the reference count of ctx->kdamond, which increased by __damon_start(),
> would not be decreased in the case.
>
If so, I suppose below change should work correctly (still set ctx->kdamond to
NULL at the end of kdamond).
static int __damon_stop(struct damon_ctx *ctx)
{
+ struct task_struct *tsk;
+
mutex_lock(&ctx->kdamond_lock);
- if (ctx->kdamond) {
- ctx->kdamond_stop = true;
+ tsk = ctx->kdamond;
+ if (tsk) {
+ get_task_struct(tsk);
mutex_unlock(&ctx->kdamond_lock);
- while (damon_kdamond_running(ctx))
- usleep_range(ctx->sample_interval,
- ctx->sample_interval * 2);
+ kthread_stop(tsk);
+ put_task_struct(tsk);
return 0;
}
mutex_unlock(&ctx->kdamond_lock);
> If I'm missing something, please let me know.
>
>
> Thanks,
> SJ
>
> [...]
--
Cheers,
Changbin Du
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/damon: simplify stop mechanism
2021-10-27 6:13 ` Changbin Du
@ 2021-10-27 10:51 ` SeongJae Park
0 siblings, 0 replies; 4+ messages in thread
From: SeongJae Park @ 2021-10-27 10:51 UTC (permalink / raw)
To: Changbin Du; +Cc: SeongJae Park, Andrew Morton, linux-mm, linux-kernel
On Wed, 27 Oct 2021 14:13:38 +0800 Changbin Du <changbin.du@gmail.com> wrote:
> On Tue, Oct 26, 2021 at 06:42:03PM +0000, SeongJae Park wrote:
> > Hello Changbin,
> >
> > On Tue, 26 Oct 2021 23:30:33 +0800 Changbin Du <changbin.du@gmail.com> wrote:
> >
> > > An kernel thread can exit gracefully with kthread_stop(). So we don't need a
> > > new flag 'kdamond_stop'. And to make sure the task struct is not freed when
> > > accessing it, get task struct on start and put it on stop.
> >
> > We previously considered using kthread_stop() here. However, we resulted in
> > current code because kdamond can be self-terminated when all target processes
> > are invalid[1].
> >
> > Seems this patch is also not fully prepared for the self-termination case. I
> > left some comments below.
> >
> > [1] https://lore.kernel.org/linux-mm/20210624102623.24563-1-sjpark@amazon.de/
> >
> > >
> > > And since the return value of 'before_terminate' callback is never used,
> > > we make it have no return value.
> >
> > This looks nice to me. Could you please send this again as a separate patch?
> >
> Sure, I'll do it later.
>
> > >
> > > Signed-off-by: Changbin Du <changbin.du@gmail.com>
> > > ---
> > > include/linux/damon.h | 3 +--
> > > mm/damon/core.c | 59 +++++++++++++------------------------------
> > > mm/damon/dbgfs.c | 5 ++--
> > > 3 files changed, 20 insertions(+), 47 deletions(-)
> > >
> > > diff --git a/include/linux/damon.h b/include/linux/damon.h
> > > index a14b3cc54cab..041966786270 100644
> > > --- a/include/linux/damon.h
> > > +++ b/include/linux/damon.h
> > [...]
> > > --- a/mm/damon/core.c
> > > +++ b/mm/damon/core.c
> > [...]
> > > @@ -1069,7 +1048,7 @@ static int kdamond_fn(void *data)
> > > sz_limit);
> > > if (ctx->callback.after_aggregation &&
> > > ctx->callback.after_aggregation(ctx))
> > > - set_kdamond_stop(ctx);
> > > + done = true;
> > > kdamond_apply_schemes(ctx);
> > > kdamond_reset_aggregated(ctx);
> > > kdamond_split_regions(ctx);
> > > @@ -1088,16 +1067,12 @@ static int kdamond_fn(void *data)
> > > damon_destroy_region(r, t);
> > > }
> > >
> > > - if (ctx->callback.before_terminate &&
> > > - ctx->callback.before_terminate(ctx))
> > > - set_kdamond_stop(ctx);
> > > + if (ctx->callback.before_terminate)
> > > + ctx->callback.before_terminate(ctx);
> > > if (ctx->primitive.cleanup)
> > > ctx->primitive.cleanup(ctx);
> > >
> > > pr_debug("kdamond (%d) finishes\n", current->pid);
> > > - mutex_lock(&ctx->kdamond_lock);
> > > - ctx->kdamond = NULL;
> > > - mutex_unlock(&ctx->kdamond_lock);
> >
> > When kdamond is self-terminating, ctx->kdamond will not be nullfified. As a
> > result, this patch can introduce some errors like below:
> >
> > # cd /sys/kernel/debug/damon
> > # sleep 60 &
> > [1] 1926
> > # echo $(pidof sleep) > target_ids
> > # echo on > monitor_on
> > # cat monitor_on
> > on
> > # # after 60 seconds, sleep finishes and kdamond is self-terminated
> > # cat monitor_on
> > off
> > # echo 42 > target_ids
> > bash: echo: write error: Device or resource busy
> >
> > If we simply restore the nullification here with the mutex locking, we would
> > result in a deadlock because __damon_stop() calls kthread_stop() while holding
> > ctx->kdamond_lock.
> >
> > Also, the reference count of ctx->kdamond, which increased by __damon_start(),
> > would not be decreased in the case.
> >
>
> If so, I suppose below change should work correctly (still set ctx->kdamond to
> NULL at the end of kdamond).
>
> static int __damon_stop(struct damon_ctx *ctx)
> {
> + struct task_struct *tsk;
> +
> mutex_lock(&ctx->kdamond_lock);
> - if (ctx->kdamond) {
> - ctx->kdamond_stop = true;
> + tsk = ctx->kdamond;
> + if (tsk) {
> + get_task_struct(tsk);
> mutex_unlock(&ctx->kdamond_lock);
> - while (damon_kdamond_running(ctx))
> - usleep_range(ctx->sample_interval,
> - ctx->sample_interval * 2);
> + kthread_stop(tsk);
> + put_task_struct(tsk);
> return 0;
> }
> mutex_unlock(&ctx->kdamond_lock);
Agreed, this seems to make sense. Could you please send a complete patch?
Thanks,
SJ
>
>
> > If I'm missing something, please let me know.
> >
> >
> > Thanks,
> > SJ
> >
> > [...]
>
> --
> Cheers,
> Changbin Du
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-10-27 10:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-26 15:30 [PATCH] mm/damon: simplify stop mechanism Changbin Du
2021-10-26 18:42 ` SeongJae Park
2021-10-27 6:13 ` Changbin Du
2021-10-27 10:51 ` SeongJae Park
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).