* [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
[not found] <CGME20210929054921epcas2p2fbe35a6262405e064aac3bd92b22b1aa@epcas2p2.samsung.com>
@ 2021-09-29 5:49 ` Gyeongtaek Lee
2021-09-29 14:11 ` Pierre-Louis Bossart
0 siblings, 1 reply; 5+ messages in thread
From: Gyeongtaek Lee @ 2021-09-29 5:49 UTC (permalink / raw)
To: 'Takashi Iwai'
Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
seungbin.lee, s47.kang, pilsun.jang
If routing change and underrun stop is run at the same time,
data abort can be occurred by the following sequence.
CPU0: Processing underrun CPU1: Processing routing change
dpcm_be_dai_trigger(): dpcm_be_disconnect():
for_each_dpcm_be(fe, stream, dpcm) {
spin_lock_irqsave(&fe->card->dpcm_lock, flags);
list_del(&dpcm->list_be);
list_del(&dpcm->list_fe);
spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
kfree(dpcm);
struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
To prevent this situation, dpcm_lock is needed during accessing
the lists for dpcm links.
Signed-off-by: Gyeongtaek Lee <gt82.lee@samsung.com>
Cc: stable@vger.kernel.org
---
sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 48f71bb81a2f..df2cd4c0dabe 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
return ret;
}
+struct dpcm_be_list {
+ unsigned int num;
+ struct snd_soc_pcm_runtime *be[];
+};
+
+static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
+ struct dpcm_be_list **be_list)
+{
+ struct snd_soc_dpcm *dpcm;
+ struct dpcm_be_list *be;
+ int size = 0;
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+
+ for_each_dpcm_be(fe, stream, dpcm)
+ size++;
+
+ be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
+ if (!be) {
+ ret = -ENOMEM;
+ } else {
+ unsigned int i = 0;
+
+ for_each_dpcm_be(fe, stream, dpcm)
+ be->be[i++] = dpcm->be;
+
+ *be_list = be;
+ }
+
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+
+ return ret;
+}
+
+static void dpcm_free_be_list(struct dpcm_be_list *be_list)
+{
+ kfree(be_list);
+}
+
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
int cmd)
{
struct snd_soc_pcm_runtime *be;
- struct snd_soc_dpcm *dpcm;
+ struct dpcm_be_list *be_list;
int ret = 0;
+ int i;
- for_each_dpcm_be(fe, stream, dpcm) {
+ ret = dpcm_create_be_list(fe, stream, &be_list);
+ if (ret < 0)
+ return ret;
+
+ for(i = 0; i < be_list->num; i++) {
struct snd_pcm_substream *be_substream;
- be = dpcm->be;
+ be = be_list->be[i];
be_substream = snd_soc_dpcm_get_substream(be, stream);
/* is this op for this BE ? */
@@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
if (ret < 0)
dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
__func__, be->dai_link->name, ret);
+ dpcm_free_be_list(be_list);
return ret;
}
EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);
base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
--
2.21.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
2021-09-29 5:49 ` [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger Gyeongtaek Lee
@ 2021-09-29 14:11 ` Pierre-Louis Bossart
2021-09-29 21:01 ` Pierre-Louis Bossart
0 siblings, 1 reply; 5+ messages in thread
From: Pierre-Louis Bossart @ 2021-09-29 14:11 UTC (permalink / raw)
To: Gyeongtaek Lee, 'Takashi Iwai'
Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
seungbin.lee, s47.kang, pilsun.jang
On 9/29/21 12:49 AM, Gyeongtaek Lee wrote:
> If routing change and underrun stop is run at the same time,
> data abort can be occurred by the following sequence.
>
> CPU0: Processing underrun CPU1: Processing routing change
> dpcm_be_dai_trigger(): dpcm_be_disconnect():
>
> for_each_dpcm_be(fe, stream, dpcm) {
>
> spin_lock_irqsave(&fe->card->dpcm_lock, flags);
> list_del(&dpcm->list_be);
> list_del(&dpcm->list_fe);
> spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
> kfree(dpcm);
>
> struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
>
> To prevent this situation, dpcm_lock is needed during accessing
> the lists for dpcm links.
Isn't there still a possible inconsistency here introduced by the
duplication of the BE list?
You protect the list creation, but before you use it in
dpcm_be_dai_trigger(), there's a time window where the function could be
pre-empted and a disconnect event might have happened. As a result you
could trigger a BE that's no longer connected.
What you identified as a race is likely valid, but how to fix it isn't
clear to me - the DPCM code isn't self-explanatory at all with its use
in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
Ideally we would need to find a way to prevent changes in connections
while we are doing the triggers, but triggers can take a bit of time if
they involve any sort of communication over a bus. I really wonder if
this dpcm_lock should be a mutex and if the model for DPCM really
involves interrupt contexts as the irqsave/irqrestore mentions hint at.
> Signed-off-by: Gyeongtaek Lee <gt82.lee@samsung.com>
> Cc: stable@vger.kernel.org
> ---
> sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 50 insertions(+), 3 deletions(-)
>
> diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
> index 48f71bb81a2f..df2cd4c0dabe 100644
> --- a/sound/soc/soc-pcm.c
> +++ b/sound/soc/soc-pcm.c
> @@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
> return ret;
> }
>
> +struct dpcm_be_list {
> + unsigned int num;
> + struct snd_soc_pcm_runtime *be[];
> +};
> +
> +static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
> + struct dpcm_be_list **be_list)
> +{
> + struct snd_soc_dpcm *dpcm;
> + struct dpcm_be_list *be;
> + int size = 0;
> + int ret = 0;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&fe->card->dpcm_lock, flags);
> +
> + for_each_dpcm_be(fe, stream, dpcm)
> + size++;
> +
> + be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
> + if (!be) {
> + ret = -ENOMEM;
> + } else {
> + unsigned int i = 0;
> +
> + for_each_dpcm_be(fe, stream, dpcm)
> + be->be[i++] = dpcm->be;
> +
> + *be_list = be;
> + }
> +
> + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
> +
> + return ret;
> +}
> +
> +static void dpcm_free_be_list(struct dpcm_be_list *be_list)
> +{
> + kfree(be_list);
> +}
> +
> int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
> int cmd)
> {
> struct snd_soc_pcm_runtime *be;
> - struct snd_soc_dpcm *dpcm;
> + struct dpcm_be_list *be_list;
> int ret = 0;
> + int i;
>
> - for_each_dpcm_be(fe, stream, dpcm) {
> + ret = dpcm_create_be_list(fe, stream, &be_list);
> + if (ret < 0)
> + return ret;
> +
> + for(i = 0; i < be_list->num; i++) {
> struct snd_pcm_substream *be_substream;
>
> - be = dpcm->be;
> + be = be_list->be[i];
> be_substream = snd_soc_dpcm_get_substream(be, stream);
>
> /* is this op for this BE ? */
> @@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
> if (ret < 0)
> dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
> __func__, be->dai_link->name, ret);
> + dpcm_free_be_list(be_list);
> return ret;
> }
> EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);
>
> base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
2021-09-29 14:11 ` Pierre-Louis Bossart
@ 2021-09-29 21:01 ` Pierre-Louis Bossart
2021-09-30 3:48 ` Gyeongtaek Lee
0 siblings, 1 reply; 5+ messages in thread
From: Pierre-Louis Bossart @ 2021-09-29 21:01 UTC (permalink / raw)
To: Gyeongtaek Lee, 'Takashi Iwai'
Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
seungbin.lee, s47.kang, pilsun.jang
>> If routing change and underrun stop is run at the same time,
>> data abort can be occurred by the following sequence.
>>
>> CPU0: Processing underrun CPU1: Processing routing change
>> dpcm_be_dai_trigger(): dpcm_be_disconnect():
>>
>> for_each_dpcm_be(fe, stream, dpcm) {
>>
>> spin_lock_irqsave(&fe->card->dpcm_lock, flags);
>> list_del(&dpcm->list_be);
>> list_del(&dpcm->list_fe);
>> spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
>> kfree(dpcm);
>>
>> struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
>>
>> To prevent this situation, dpcm_lock is needed during accessing
>> the lists for dpcm links.
>
> Isn't there still a possible inconsistency here introduced by the
> duplication of the BE list?
>
> You protect the list creation, but before you use it in
> dpcm_be_dai_trigger(), there's a time window where the function could be
> pre-empted and a disconnect event might have happened. As a result you
> could trigger a BE that's no longer connected.
>
> What you identified as a race is likely valid, but how to fix it isn't
> clear to me - the DPCM code isn't self-explanatory at all with its use
> in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
>
> Ideally we would need to find a way to prevent changes in connections
> while we are doing the triggers, but triggers can take a bit of time if
> they involve any sort of communication over a bus. I really wonder if
> this dpcm_lock should be a mutex and if the model for DPCM really
> involves interrupt contexts as the irqsave/irqrestore mentions hint at.
To follow-up on this, I started experimenting with a replacement of the
'dpcm_lock' spinlock with a 'dpcm_mutex', see
https://github.com/thesofproject/linux/pull/3186
If we combine both of our results, the 'right' solution might be to take
this mutex before every use of for_each_dpcm_be(), and unlock it at the
end of the loop, which additional changes to avoid re-taking the same
mutex in helper functions.
there's still a part in DPCM that I can't figure out, there is an
elaborate trick with an explicit comment
/* if FE's runtime_update is already set, we're in race;
* process this trigger later at exit
*/
Which looks like a missing mutex somewhere, or an overkill solution that
might never be needed.
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
2021-09-29 21:01 ` Pierre-Louis Bossart
@ 2021-09-30 3:48 ` Gyeongtaek Lee
0 siblings, 0 replies; 5+ messages in thread
From: Gyeongtaek Lee @ 2021-09-30 3:48 UTC (permalink / raw)
To: 'Pierre-Louis Bossart', 'Takashi Iwai'
Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
seungbin.lee, s47.kang, pilsun.jang
>>> If routing change and underrun stop is run at the same time,
>>> data abort can be occurred by the following sequence.
>>>
>>> CPU0: Processing underrun CPU1: Processing routing change
>>> dpcm_be_dai_trigger(): dpcm_be_disconnect():
>>>
>>> for_each_dpcm_be(fe, stream, dpcm) {
>>>
>>> spin_lock_irqsave(&fe->card->dpcm_lock, flags);
>>> list_del(&dpcm->list_be);
>>> list_del(&dpcm->list_fe);
>>> spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
>>> kfree(dpcm);
>>>
>>> struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
>>>
>>> To prevent this situation, dpcm_lock is needed during accessing
>>> the lists for dpcm links.
>>
>> Isn't there still a possible inconsistency here introduced by the
>> duplication of the BE list?
>>
>> You protect the list creation, but before you use it in
>> dpcm_be_dai_trigger(), there's a time window where the function could be
>> pre-empted and a disconnect event might have happened. As a result you
>> could trigger a BE that's no longer connected.
>>
>> What you identified as a race is likely valid, but how to fix it isn't
>> clear to me - the DPCM code isn't self-explanatory at all with its use
>> in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
>>
>> Ideally we would need to find a way to prevent changes in connections
>> while we are doing the triggers, but triggers can take a bit of time if
>> they involve any sort of communication over a bus. I really wonder if
>> this dpcm_lock should be a mutex and if the model for DPCM really
>> involves interrupt contexts as the irqsave/irqrestore mentions hint at.
>
>To follow-up on this, I started experimenting with a replacement of the
>'dpcm_lock' spinlock with a 'dpcm_mutex', see
>https://protect2.fireeye.com/v1/url?k=bdfd74d3-e2664dcc-bdfcff9c-000babdfecba-6f3671279e770f0b&q=1&e=7fdf074e-2aa1-44f0-bd52-58f2d26c9bfb&u=https%3A%2F%2Fgithub.com%2Fthesofproject%2Flinux%2Fpull%2F3186
>
>If we combine both of our results, the 'right' solution might be to take
>this mutex before every use of for_each_dpcm_be(), and unlock it at the
>end of the loop, which additional changes to avoid re-taking the same
>mutex in helper functions.
>
>there's still a part in DPCM that I can't figure out, there is an
>elaborate trick with an explicit comment
>
> /* if FE's runtime_update is already set, we're in race;
> * process this trigger later at exit
> */
>
>Which looks like a missing mutex somewhere, or an overkill solution that
>might never be needed.
>
You are right.
This patch can't resolve inconsistency problem completely.
I thought that even part of the problem can be resolved by this patch and
it could help some other developers and me also.
And I also thought that invalid trigger on disconnected BE DAI can be protected
by the state check in the trigger function like the below.
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
int cmd)
{
struct snd_soc_dpcm *dpcm;
int ret = 0;
for_each_dpcm_be(fe, stream, dpcm) {
.......
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
/* Following if statement protect invalid control. */
if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) &&
(be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
(be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
continue;
ret = dpcm_do_trigger(dpcm, be_substream, cmd);
I really appreciate that there is a project about this problem already.
But if the project needs more time to be merged into the mainline,
I think that this patch can be used until the project is merged.
If you don't mind, would you reconsider this patch one more time?
Thank you,
Gyeongtaek Lee.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
[not found] <CGME20210907012440epcas2p3cab33295dff61e84ae422457fbc795f6@epcas2p3.samsung.com>
@ 2021-09-07 1:24 ` Gyeongtaek Lee
0 siblings, 0 replies; 5+ messages in thread
From: Gyeongtaek Lee @ 2021-09-07 1:24 UTC (permalink / raw)
To: 'Takashi Iwai'
Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
seungbin.lee, s47.kang, pilsun.jang
If routing change and underrun stop is run at the same time,
data abort can be occurred by the following sequence.
CPU0: Processing underrun CPU1: Processing routing change
dpcm_be_dai_trigger(): dpcm_be_disconnect():
for_each_dpcm_be(fe, stream, dpcm) {
spin_lock_irqsave(&fe->card->dpcm_lock, flags);
list_del(&dpcm->list_be);
list_del(&dpcm->list_fe);
spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
kfree(dpcm);
struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
To prevent this situation, dpcm_lock is needed during accessing
the lists for dpcm links.
Signed-off-by: Gyeongtaek Lee <gt82.lee@samsung.com>
Cc: stable@vger.kernel.org
---
sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 48f71bb81a2f..df2cd4c0dabe 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
return ret;
}
+struct dpcm_be_list {
+ unsigned int num;
+ struct snd_soc_pcm_runtime *be[];
+};
+
+static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
+ struct dpcm_be_list **be_list)
+{
+ struct snd_soc_dpcm *dpcm;
+ struct dpcm_be_list *be;
+ int size = 0;
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+
+ for_each_dpcm_be(fe, stream, dpcm)
+ size++;
+
+ be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
+ if (!be) {
+ ret = -ENOMEM;
+ } else {
+ unsigned int i = 0;
+
+ for_each_dpcm_be(fe, stream, dpcm)
+ be->be[i++] = dpcm->be;
+
+ *be_list = be;
+ }
+
+ spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+
+ return ret;
+}
+
+static void dpcm_free_be_list(struct dpcm_be_list *be_list)
+{
+ kfree(be_list);
+}
+
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
int cmd)
{
struct snd_soc_pcm_runtime *be;
- struct snd_soc_dpcm *dpcm;
+ struct dpcm_be_list *be_list;
int ret = 0;
+ int i;
- for_each_dpcm_be(fe, stream, dpcm) {
+ ret = dpcm_create_be_list(fe, stream, &be_list);
+ if (ret < 0)
+ return ret;
+
+ for(i = 0; i < be_list->num; i++) {
struct snd_pcm_substream *be_substream;
- be = dpcm->be;
+ be = be_list->be[i];
be_substream = snd_soc_dpcm_get_substream(be, stream);
/* is this op for this BE ? */
@@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
if (ret < 0)
dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
__func__, be->dai_link->name, ret);
+ dpcm_free_be_list(be_list);
return ret;
}
EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);
base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
--
2.21.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-09-30 3:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <CGME20210929054921epcas2p2fbe35a6262405e064aac3bd92b22b1aa@epcas2p2.samsung.com>
2021-09-29 5:49 ` [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger Gyeongtaek Lee
2021-09-29 14:11 ` Pierre-Louis Bossart
2021-09-29 21:01 ` Pierre-Louis Bossart
2021-09-30 3:48 ` Gyeongtaek Lee
[not found] <CGME20210907012440epcas2p3cab33295dff61e84ae422457fbc795f6@epcas2p3.samsung.com>
2021-09-07 1:24 ` Gyeongtaek Lee
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).