alsa-devel.alsa-project.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
       [not found] <CGME20210929054921epcas2p2fbe35a6262405e064aac3bd92b22b1aa@epcas2p2.samsung.com>
@ 2021-09-29  5:49 ` Gyeongtaek Lee
  2021-09-29 14:11   ` Pierre-Louis Bossart
  0 siblings, 1 reply; 5+ messages in thread
From: Gyeongtaek Lee @ 2021-09-29  5:49 UTC (permalink / raw)
  To: 'Takashi Iwai'
  Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
	seungbin.lee, s47.kang, pilsun.jang

If routing change and underrun stop is run at the same time,
data abort can be occurred by the following sequence.

CPU0: Processing underrun 	CPU1: Processing routing change
dpcm_be_dai_trigger():		dpcm_be_disconnect():

for_each_dpcm_be(fe, stream, dpcm) {

				spin_lock_irqsave(&fe->card->dpcm_lock, flags);
				list_del(&dpcm->list_be);
				list_del(&dpcm->list_fe);
				spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
				kfree(dpcm);

struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory

To prevent this situation, dpcm_lock is needed during accessing
the lists for dpcm links.

Signed-off-by: Gyeongtaek Lee <gt82.lee@samsung.com>
Cc: stable@vger.kernel.org
---
 sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 48f71bb81a2f..df2cd4c0dabe 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
 	return ret;
 }
 
+struct dpcm_be_list {
+	unsigned int num;
+	struct snd_soc_pcm_runtime *be[];
+};
+
+static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
+		struct dpcm_be_list **be_list)
+{
+	struct snd_soc_dpcm *dpcm;
+	struct dpcm_be_list *be;
+	int size = 0;
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+
+	for_each_dpcm_be(fe, stream, dpcm)
+		size++;
+
+	be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
+	if (!be) {
+		ret = -ENOMEM;
+	} else {
+		unsigned int i = 0;
+
+		for_each_dpcm_be(fe, stream, dpcm)
+			be->be[i++] = dpcm->be;
+
+		*be_list = be;
+	}
+
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+
+	return ret;
+}
+
+static void dpcm_free_be_list(struct dpcm_be_list *be_list)
+{
+	kfree(be_list);
+}
+
 int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
 			       int cmd)
 {
 	struct snd_soc_pcm_runtime *be;
-	struct snd_soc_dpcm *dpcm;
+	struct dpcm_be_list *be_list;
 	int ret = 0;
+	int i;
 
-	for_each_dpcm_be(fe, stream, dpcm) {
+	ret = dpcm_create_be_list(fe, stream, &be_list);
+	if (ret < 0)
+		return ret;
+
+	for(i = 0; i < be_list->num; i++) {
 		struct snd_pcm_substream *be_substream;
 
-		be = dpcm->be;
+		be = be_list->be[i];
 		be_substream = snd_soc_dpcm_get_substream(be, stream);
 
 		/* is this op for this BE ? */
@@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
 	if (ret < 0)
 		dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
 			__func__, be->dai_link->name, ret);
+	dpcm_free_be_list(be_list);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);

base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
  2021-09-29  5:49 ` [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger Gyeongtaek Lee
@ 2021-09-29 14:11   ` Pierre-Louis Bossart
  2021-09-29 21:01     ` Pierre-Louis Bossart
  0 siblings, 1 reply; 5+ messages in thread
From: Pierre-Louis Bossart @ 2021-09-29 14:11 UTC (permalink / raw)
  To: Gyeongtaek Lee, 'Takashi Iwai'
  Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
	seungbin.lee, s47.kang, pilsun.jang



On 9/29/21 12:49 AM, Gyeongtaek Lee wrote:
> If routing change and underrun stop is run at the same time,
> data abort can be occurred by the following sequence.
> 
> CPU0: Processing underrun 	CPU1: Processing routing change
> dpcm_be_dai_trigger():		dpcm_be_disconnect():
> 
> for_each_dpcm_be(fe, stream, dpcm) {
> 
> 				spin_lock_irqsave(&fe->card->dpcm_lock, flags);
> 				list_del(&dpcm->list_be);
> 				list_del(&dpcm->list_fe);
> 				spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
> 				kfree(dpcm);
> 
> struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
> 
> To prevent this situation, dpcm_lock is needed during accessing
> the lists for dpcm links.

Isn't there still a possible inconsistency here introduced by the
duplication of the BE list?

You protect the list creation, but before you use it in
dpcm_be_dai_trigger(), there's a time window where the function could be
pre-empted and a disconnect event might have happened. As a result you
could trigger a BE that's no longer connected.

What you identified as a race is likely valid, but how to fix it isn't
clear to me - the DPCM code isn't self-explanatory at all with its use
in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.

Ideally we would need to find a way to prevent changes in connections
while we are doing the triggers, but triggers can take a bit of time if
they involve any sort of communication over a bus. I really wonder if
this dpcm_lock should be a mutex and if the model for DPCM really
involves interrupt contexts as the irqsave/irqrestore mentions hint at.

> Signed-off-by: Gyeongtaek Lee <gt82.lee@samsung.com>
> Cc: stable@vger.kernel.org
> ---
>  sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 50 insertions(+), 3 deletions(-)
> 
> diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
> index 48f71bb81a2f..df2cd4c0dabe 100644
> --- a/sound/soc/soc-pcm.c
> +++ b/sound/soc/soc-pcm.c
> @@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
>  	return ret;
>  }
>  
> +struct dpcm_be_list {
> +	unsigned int num;
> +	struct snd_soc_pcm_runtime *be[];
> +};
> +
> +static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
> +		struct dpcm_be_list **be_list)
> +{
> +	struct snd_soc_dpcm *dpcm;
> +	struct dpcm_be_list *be;
> +	int size = 0;
> +	int ret = 0;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
> +
> +	for_each_dpcm_be(fe, stream, dpcm)
> +		size++;
> +
> +	be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
> +	if (!be) {
> +		ret = -ENOMEM;
> +	} else {
> +		unsigned int i = 0;
> +
> +		for_each_dpcm_be(fe, stream, dpcm)
> +			be->be[i++] = dpcm->be;
> +
> +		*be_list = be;
> +	}
> +
> +	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
> +
> +	return ret;
> +}
> +
> +static void dpcm_free_be_list(struct dpcm_be_list *be_list)
> +{
> +	kfree(be_list);
> +}
> +
>  int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
>  			       int cmd)
>  {
>  	struct snd_soc_pcm_runtime *be;
> -	struct snd_soc_dpcm *dpcm;
> +	struct dpcm_be_list *be_list;
>  	int ret = 0;
> +	int i;
>  
> -	for_each_dpcm_be(fe, stream, dpcm) {
> +	ret = dpcm_create_be_list(fe, stream, &be_list);
> +	if (ret < 0)
> +		return ret;
> +
> +	for(i = 0; i < be_list->num; i++) {
>  		struct snd_pcm_substream *be_substream;
>  
> -		be = dpcm->be;
> +		be = be_list->be[i];
>  		be_substream = snd_soc_dpcm_get_substream(be, stream);
>  
>  		/* is this op for this BE ? */
> @@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
>  	if (ret < 0)
>  		dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
>  			__func__, be->dai_link->name, ret);
> +	dpcm_free_be_list(be_list);
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);
> 
> base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
  2021-09-29 14:11   ` Pierre-Louis Bossart
@ 2021-09-29 21:01     ` Pierre-Louis Bossart
  2021-09-30  3:48       ` Gyeongtaek Lee
  0 siblings, 1 reply; 5+ messages in thread
From: Pierre-Louis Bossart @ 2021-09-29 21:01 UTC (permalink / raw)
  To: Gyeongtaek Lee, 'Takashi Iwai'
  Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
	seungbin.lee, s47.kang, pilsun.jang


>> If routing change and underrun stop is run at the same time,
>> data abort can be occurred by the following sequence.
>>
>> CPU0: Processing underrun 	CPU1: Processing routing change
>> dpcm_be_dai_trigger():		dpcm_be_disconnect():
>>
>> for_each_dpcm_be(fe, stream, dpcm) {
>>
>> 				spin_lock_irqsave(&fe->card->dpcm_lock, flags);
>> 				list_del(&dpcm->list_be);
>> 				list_del(&dpcm->list_fe);
>> 				spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
>> 				kfree(dpcm);
>>
>> struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
>>
>> To prevent this situation, dpcm_lock is needed during accessing
>> the lists for dpcm links.
> 
> Isn't there still a possible inconsistency here introduced by the
> duplication of the BE list?
> 
> You protect the list creation, but before you use it in
> dpcm_be_dai_trigger(), there's a time window where the function could be
> pre-empted and a disconnect event might have happened. As a result you
> could trigger a BE that's no longer connected.
> 
> What you identified as a race is likely valid, but how to fix it isn't
> clear to me - the DPCM code isn't self-explanatory at all with its use
> in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
> 
> Ideally we would need to find a way to prevent changes in connections
> while we are doing the triggers, but triggers can take a bit of time if
> they involve any sort of communication over a bus. I really wonder if
> this dpcm_lock should be a mutex and if the model for DPCM really
> involves interrupt contexts as the irqsave/irqrestore mentions hint at.

To follow-up on this, I started experimenting with a replacement of the
'dpcm_lock' spinlock with a 'dpcm_mutex', see
https://github.com/thesofproject/linux/pull/3186

If we combine both of our results, the 'right' solution might be to take
this mutex before every use of for_each_dpcm_be(), and unlock it at the
end of the loop, which additional changes to avoid re-taking the same
mutex in helper functions.

there's still a part in DPCM that I can't figure out, there is an
elaborate trick with an explicit comment

	/* if FE's runtime_update is already set, we're in race;
	 * process this trigger later at exit
	 */

Which looks like a missing mutex somewhere, or an overkill solution that
might never be needed.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
  2021-09-29 21:01     ` Pierre-Louis Bossart
@ 2021-09-30  3:48       ` Gyeongtaek Lee
  0 siblings, 0 replies; 5+ messages in thread
From: Gyeongtaek Lee @ 2021-09-30  3:48 UTC (permalink / raw)
  To: 'Pierre-Louis Bossart', 'Takashi Iwai'
  Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
	seungbin.lee, s47.kang, pilsun.jang

>>> If routing change and underrun stop is run at the same time,
>>> data abort can be occurred by the following sequence.
>>>
>>> CPU0: Processing underrun 	CPU1: Processing routing change
>>> dpcm_be_dai_trigger():		dpcm_be_disconnect():
>>>
>>> for_each_dpcm_be(fe, stream, dpcm) {
>>>
>>> 				spin_lock_irqsave(&fe->card->dpcm_lock, flags);
>>> 				list_del(&dpcm->list_be);
>>> 				list_del(&dpcm->list_fe);
>>> 				spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
>>> 				kfree(dpcm);
>>>
>>> struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
>>>
>>> To prevent this situation, dpcm_lock is needed during accessing
>>> the lists for dpcm links.
>> 
>> Isn't there still a possible inconsistency here introduced by the
>> duplication of the BE list?
>> 
>> You protect the list creation, but before you use it in
>> dpcm_be_dai_trigger(), there's a time window where the function could be
>> pre-empted and a disconnect event might have happened. As a result you
>> could trigger a BE that's no longer connected.
>> 
>> What you identified as a race is likely valid, but how to fix it isn't
>> clear to me - the DPCM code isn't self-explanatory at all with its use
>> in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
>> 
>> Ideally we would need to find a way to prevent changes in connections
>> while we are doing the triggers, but triggers can take a bit of time if
>> they involve any sort of communication over a bus. I really wonder if
>> this dpcm_lock should be a mutex and if the model for DPCM really
>> involves interrupt contexts as the irqsave/irqrestore mentions hint at.
>
>To follow-up on this, I started experimenting with a replacement of the
>'dpcm_lock' spinlock with a 'dpcm_mutex', see
>https://protect2.fireeye.com/v1/url?k=bdfd74d3-e2664dcc-bdfcff9c-000babdfecba-6f3671279e770f0b&q=1&e=7fdf074e-2aa1-44f0-bd52-58f2d26c9bfb&u=https%3A%2F%2Fgithub.com%2Fthesofproject%2Flinux%2Fpull%2F3186
>
>If we combine both of our results, the 'right' solution might be to take
>this mutex before every use of for_each_dpcm_be(), and unlock it at the
>end of the loop, which additional changes to avoid re-taking the same
>mutex in helper functions.
>
>there's still a part in DPCM that I can't figure out, there is an
>elaborate trick with an explicit comment
>
>	/* if FE's runtime_update is already set, we're in race;
>	 * process this trigger later at exit
>	 */
>
>Which looks like a missing mutex somewhere, or an overkill solution that
>might never be needed.
>
You are right.
This patch can't resolve inconsistency problem completely.
I thought that even part of the problem can be resolved by this patch and
it could help some other developers and me also.
And I also thought that invalid trigger on disconnected BE DAI can be protected
by the state check in the trigger function like the below.

int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
			       int cmd)
{
	struct snd_soc_dpcm *dpcm;
	int ret = 0;

	for_each_dpcm_be(fe, stream, dpcm) {
.......
		switch (cmd) {
		case SNDRV_PCM_TRIGGER_START:
			/* Following if statement protect invalid control. */
			if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) &&
			    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
			    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
				continue;

			ret = dpcm_do_trigger(dpcm, be_substream, cmd);

I really appreciate that there is a project about this problem already.
But if the project needs more time to be merged into the mainline,
I think that this patch can be used until the project is merged.
If you don't mind, would you reconsider this patch one more time?

Thank you,
Gyeongtaek Lee.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
       [not found] <CGME20210907012440epcas2p3cab33295dff61e84ae422457fbc795f6@epcas2p3.samsung.com>
@ 2021-09-07  1:24 ` Gyeongtaek Lee
  0 siblings, 0 replies; 5+ messages in thread
From: Gyeongtaek Lee @ 2021-09-07  1:24 UTC (permalink / raw)
  To: 'Takashi Iwai'
  Cc: alsa-devel, kimty, lgirdwood, senius.park, donggyun.ko, hmseo,
	seungbin.lee, s47.kang, pilsun.jang

If routing change and underrun stop is run at the same time,
data abort can be occurred by the following sequence.

CPU0: Processing underrun 	CPU1: Processing routing change
dpcm_be_dai_trigger():		dpcm_be_disconnect():

for_each_dpcm_be(fe, stream, dpcm) {

				spin_lock_irqsave(&fe->card->dpcm_lock, flags);
				list_del(&dpcm->list_be);
				list_del(&dpcm->list_fe);
				spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
				kfree(dpcm);

struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory

To prevent this situation, dpcm_lock is needed during accessing
the lists for dpcm links.

Signed-off-by: Gyeongtaek Lee <gt82.lee@samsung.com>
Cc: stable@vger.kernel.org
---
 sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 48f71bb81a2f..df2cd4c0dabe 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
 	return ret;
 }
 
+struct dpcm_be_list {
+	unsigned int num;
+	struct snd_soc_pcm_runtime *be[];
+};
+
+static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
+		struct dpcm_be_list **be_list)
+{
+	struct snd_soc_dpcm *dpcm;
+	struct dpcm_be_list *be;
+	int size = 0;
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+
+	for_each_dpcm_be(fe, stream, dpcm)
+		size++;
+
+	be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
+	if (!be) {
+		ret = -ENOMEM;
+	} else {
+		unsigned int i = 0;
+
+		for_each_dpcm_be(fe, stream, dpcm)
+			be->be[i++] = dpcm->be;
+
+		*be_list = be;
+	}
+
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+
+	return ret;
+}
+
+static void dpcm_free_be_list(struct dpcm_be_list *be_list)
+{
+	kfree(be_list);
+}
+
 int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
 			       int cmd)
 {
 	struct snd_soc_pcm_runtime *be;
-	struct snd_soc_dpcm *dpcm;
+	struct dpcm_be_list *be_list;
 	int ret = 0;
+	int i;
 
-	for_each_dpcm_be(fe, stream, dpcm) {
+	ret = dpcm_create_be_list(fe, stream, &be_list);
+	if (ret < 0)
+		return ret;
+
+	for(i = 0; i < be_list->num; i++) {
 		struct snd_pcm_substream *be_substream;
 
-		be = dpcm->be;
+		be = be_list->be[i];
 		be_substream = snd_soc_dpcm_get_substream(be, stream);
 
 		/* is this op for this BE ? */
@@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
 	if (ret < 0)
 		dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
 			__func__, be->dai_link->name, ret);
+	dpcm_free_be_list(be_list);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);

base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-09-30  3:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20210929054921epcas2p2fbe35a6262405e064aac3bd92b22b1aa@epcas2p2.samsung.com>
2021-09-29  5:49 ` [PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger Gyeongtaek Lee
2021-09-29 14:11   ` Pierre-Louis Bossart
2021-09-29 21:01     ` Pierre-Louis Bossart
2021-09-30  3:48       ` Gyeongtaek Lee
     [not found] <CGME20210907012440epcas2p3cab33295dff61e84ae422457fbc795f6@epcas2p3.samsung.com>
2021-09-07  1:24 ` Gyeongtaek Lee

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).