linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] amba: Properly handle device probe without IRQ domain
@ 2021-08-16  7:46 Kefeng Wang
  2021-08-16  7:46 ` [PATCH 1/3] amba: Drop unused functions about APB/AHB devices add Kefeng Wang
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-16  7:46 UTC (permalink / raw)
  To: linux-kernel, Rob Herring, Frank Rowand
  Cc: devicetree, Russell King, Linus Walleij, linux-arm-kernel, Kefeng Wang

Patch 1 and 2 make some cleanup, and patch 3 use of_irq_get() instead of 
irq_of_parse_and_map() to get irq number, return -EPROBE_DEFER if the irq
domain is not yet created, amba_device_add() will properly to handle the
no IRQ domain issue via deferred probe.

Kefeng Wang (3):
  amba: Drop unused functions about APB/AHB devices add
  Revert "ARM: amba: make use of -1 IRQs warn"
  amba: Properly handle device probe without IRQ domain

 drivers/amba/bus.c       | 100 ++++++++++-----------------------------
 drivers/of/platform.c    |   6 +--
 include/linux/amba/bus.h |  18 -------
 3 files changed, 27 insertions(+), 97 deletions(-)

-- 
2.26.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/3] amba: Drop unused functions about APB/AHB devices add
  2021-08-16  7:46 [PATCH 0/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
@ 2021-08-16  7:46 ` Kefeng Wang
  2021-08-16  7:46 ` [PATCH 2/3] Revert "ARM: amba: make use of -1 IRQs warn" Kefeng Wang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-16  7:46 UTC (permalink / raw)
  To: linux-kernel, Rob Herring, Frank Rowand
  Cc: devicetree, Russell King, Linus Walleij, linux-arm-kernel, Kefeng Wang

No one use the following functions, kill them.

  amba_aphb_device_add()
  amba_apb_device_add()
  amba_apb_device_add_res()
  amba_ahb_device_add()
  amba_ahb_device_add_res()

Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 drivers/amba/bus.c       | 72 ----------------------------------------
 include/linux/amba/bus.h | 18 ----------
 2 files changed, 90 deletions(-)

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 962041148482..2f2137518be0 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -579,78 +579,6 @@ int amba_device_add(struct amba_device *dev, struct resource *parent)
 }
 EXPORT_SYMBOL_GPL(amba_device_add);
 
-static struct amba_device *
-amba_aphb_device_add(struct device *parent, const char *name,
-		     resource_size_t base, size_t size, int irq1, int irq2,
-		     void *pdata, unsigned int periphid, u64 dma_mask,
-		     struct resource *resbase)
-{
-	struct amba_device *dev;
-	int ret;
-
-	dev = amba_device_alloc(name, base, size);
-	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	dev->dev.coherent_dma_mask = dma_mask;
-	dev->irq[0] = irq1;
-	dev->irq[1] = irq2;
-	dev->periphid = periphid;
-	dev->dev.platform_data = pdata;
-	dev->dev.parent = parent;
-
-	ret = amba_device_add(dev, resbase);
-	if (ret) {
-		amba_device_put(dev);
-		return ERR_PTR(ret);
-	}
-
-	return dev;
-}
-
-struct amba_device *
-amba_apb_device_add(struct device *parent, const char *name,
-		    resource_size_t base, size_t size, int irq1, int irq2,
-		    void *pdata, unsigned int periphid)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, 0, &iomem_resource);
-}
-EXPORT_SYMBOL_GPL(amba_apb_device_add);
-
-struct amba_device *
-amba_ahb_device_add(struct device *parent, const char *name,
-		    resource_size_t base, size_t size, int irq1, int irq2,
-		    void *pdata, unsigned int periphid)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, ~0ULL, &iomem_resource);
-}
-EXPORT_SYMBOL_GPL(amba_ahb_device_add);
-
-struct amba_device *
-amba_apb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, 0, resbase);
-}
-EXPORT_SYMBOL_GPL(amba_apb_device_add_res);
-
-struct amba_device *
-amba_ahb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, ~0ULL, resbase);
-}
-EXPORT_SYMBOL_GPL(amba_ahb_device_add_res);
-
-
 static void amba_device_initialize(struct amba_device *dev, const char *name)
 {
 	device_initialize(&dev->dev);
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c68d87b87283..edfcf7a14dcd 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -122,24 +122,6 @@ struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t);
 void amba_device_put(struct amba_device *);
 int amba_device_add(struct amba_device *, struct resource *);
 int amba_device_register(struct amba_device *, struct resource *);
-struct amba_device *amba_apb_device_add(struct device *parent, const char *name,
-					resource_size_t base, size_t size,
-					int irq1, int irq2, void *pdata,
-					unsigned int periphid);
-struct amba_device *amba_ahb_device_add(struct device *parent, const char *name,
-					resource_size_t base, size_t size,
-					int irq1, int irq2, void *pdata,
-					unsigned int periphid);
-struct amba_device *
-amba_apb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase);
-struct amba_device *
-amba_ahb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase);
 void amba_device_unregister(struct amba_device *);
 struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
 int amba_request_regions(struct amba_device *, const char *);
-- 
2.26.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/3] Revert "ARM: amba: make use of -1 IRQs warn"
  2021-08-16  7:46 [PATCH 0/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
  2021-08-16  7:46 ` [PATCH 1/3] amba: Drop unused functions about APB/AHB devices add Kefeng Wang
@ 2021-08-16  7:46 ` Kefeng Wang
  2021-08-16  7:46 ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
  2021-08-17 22:27 ` [PATCH 0/3] " Rob Herring
  3 siblings, 0 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-16  7:46 UTC (permalink / raw)
  To: linux-kernel, Rob Herring, Frank Rowand
  Cc: devicetree, Russell King, Linus Walleij, linux-arm-kernel, Kefeng Wang

After commit 77a7300abad7 ("of/irq: Get rid of NO_IRQ usage"),
no irq case has been removed, irq_of_parse_and_map() will return
0 in all cases when get error from parse and map an interrupt into
linux virq space.

amba_device_register() is only used on no-DT initialization, see
  s3c64xx_pl080_init()		arch/arm/mach-s3c/pl080.c
  ep93xx_init_devices()		arch/arm/mach-ep93xx/core.c

They won't set -1 to irq[0], so no need the warn.

This reverts commit 2eac58d5026e4ec8b17ff8b62877fea9e1d2f1b3.

Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 drivers/amba/bus.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 2f2137518be0..36f2f42c8014 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -377,9 +377,6 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
 	void __iomem *tmp;
 	int i, ret;
 
-	WARN_ON(dev->irq[0] == (unsigned int)-1);
-	WARN_ON(dev->irq[1] == (unsigned int)-1);
-
 	ret = request_resource(parent, &dev->res);
 	if (ret)
 		goto err_out;
-- 
2.26.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-16  7:46 [PATCH 0/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
  2021-08-16  7:46 ` [PATCH 1/3] amba: Drop unused functions about APB/AHB devices add Kefeng Wang
  2021-08-16  7:46 ` [PATCH 2/3] Revert "ARM: amba: make use of -1 IRQs warn" Kefeng Wang
@ 2021-08-16  7:46 ` Kefeng Wang
  2021-08-24 20:05   ` Rob Herring
  2021-08-17 22:27 ` [PATCH 0/3] " Rob Herring
  3 siblings, 1 reply; 25+ messages in thread
From: Kefeng Wang @ 2021-08-16  7:46 UTC (permalink / raw)
  To: linux-kernel, Rob Herring, Frank Rowand
  Cc: devicetree, Russell King, Linus Walleij, linux-arm-kernel,
	Kefeng Wang, Ruizhe Lin

of_amba_device_create() uses irq_of_parse_and_map() to translate
a DT interrupt specification into a Linux virtual interrupt number.

But it doesn't properly handle the case where the interrupt controller
is not yet available, eg, when pl011 interrupt is connected to MBIGEN
interrupt controller, because the mbigen initialization is too late,
which will lead to no IRQ due to no IRQ domain found, log is shown below,
  "irq: no irq domain found for uart0 !"

use of_irq_get() to return -EPROBE_DEFER as above, and in the function
amba_device_try_add()/amba_device_add(), it will properly handle in such
case, also return 0 in other fail cases to be consistent as before.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Reported-by: Ruizhe Lin <linruizhe@huawei.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
 drivers/of/platform.c |  6 +-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 36f2f42c8014..720aa6cdd402 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -19,6 +19,7 @@
 #include <linux/clk/clk-conf.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
+#include <linux/of_irq.h>
 
 #include <asm/irq.h>
 
@@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
 	kfree(d);
 }
 
+static int of_amba_device_decode_irq(struct amba_device *dev)
+{
+	struct device_node *node = dev->dev.of_node;
+	int i, irq = 0;
+
+	if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
+		/* Decode the IRQs and address ranges */
+		for (i = 0; i < AMBA_NR_IRQS; i++) {
+			irq = of_irq_get(node, i);
+			if (irq < 0) {
+				if (irq == -EPROBE_DEFER)
+					return irq;
+				irq = 0;
+			}
+
+			dev->irq[i] = irq;
+		}
+	}
+
+	return 0;
+}
+
 static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
 {
 	u32 size;
 	void __iomem *tmp;
 	int i, ret;
 
+	ret = of_amba_device_decode_irq(dev);
+	if (ret)
+		goto err_out;
+
 	ret = request_resource(parent, &dev->res);
 	if (ret)
 		goto err_out;
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 74afbb7a4f5e..32d5ff8df747 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -222,7 +222,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 {
 	struct amba_device *dev;
 	const void *prop;
-	int i, ret;
+	int ret;
 
 	pr_debug("Creating amba device %pOF\n", node);
 
@@ -253,10 +253,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 	if (prop)
 		dev->periphid = of_read_ulong(prop, 1);
 
-	/* Decode the IRQs and address ranges */
-	for (i = 0; i < AMBA_NR_IRQS; i++)
-		dev->irq[i] = irq_of_parse_and_map(node, i);
-
 	ret = of_address_to_resource(node, 0, &dev->res);
 	if (ret) {
 		pr_err("amba: of_address_to_resource() failed (%d) for %pOF\n",
-- 
2.26.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 0/3] amba: Properly handle device probe without IRQ domain
  2021-08-16  7:46 [PATCH 0/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
                   ` (2 preceding siblings ...)
  2021-08-16  7:46 ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
@ 2021-08-17 22:27 ` Rob Herring
  2021-08-23  2:19   ` Kefeng Wang
  3 siblings, 1 reply; 25+ messages in thread
From: Rob Herring @ 2021-08-17 22:27 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: linux-kernel, Frank Rowand, devicetree, Russell King,
	Linus Walleij, linux-arm-kernel

On Mon, Aug 16, 2021 at 03:46:16PM +0800, Kefeng Wang wrote:
> Patch 1 and 2 make some cleanup, and patch 3 use of_irq_get() instead of 
> irq_of_parse_and_map() to get irq number, return -EPROBE_DEFER if the irq
> domain is not yet created, amba_device_add() will properly to handle the
> no IRQ domain issue via deferred probe.
> 
> Kefeng Wang (3):
>   amba: Drop unused functions about APB/AHB devices add
>   Revert "ARM: amba: make use of -1 IRQs warn"
>   amba: Properly handle device probe without IRQ domain
> 
>  drivers/amba/bus.c       | 100 ++++++++++-----------------------------
>  drivers/of/platform.c    |   6 +--
>  include/linux/amba/bus.h |  18 -------
>  3 files changed, 27 insertions(+), 97 deletions(-)

Reviewed-by: Rob Herring <robh@kernel.org>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 0/3] amba: Properly handle device probe without IRQ domain
  2021-08-17 22:27 ` [PATCH 0/3] " Rob Herring
@ 2021-08-23  2:19   ` Kefeng Wang
  2021-08-23  9:05     ` Russell King (Oracle)
  0 siblings, 1 reply; 25+ messages in thread
From: Kefeng Wang @ 2021-08-23  2:19 UTC (permalink / raw)
  To: Rob Herring
  Cc: linux-kernel, Frank Rowand, devicetree, Russell King,
	Linus Walleij, linux-arm-kernel


On 2021/8/18 6:27, Rob Herring wrote:
> On Mon, Aug 16, 2021 at 03:46:16PM +0800, Kefeng Wang wrote:
>> Patch 1 and 2 make some cleanup, and patch 3 use of_irq_get() instead of
>> irq_of_parse_and_map() to get irq number, return -EPROBE_DEFER if the irq
>> domain is not yet created, amba_device_add() will properly to handle the
>> no IRQ domain issue via deferred probe.
>>
>> Kefeng Wang (3):
>>    amba: Drop unused functions about APB/AHB devices add
>>    Revert "ARM: amba: make use of -1 IRQs warn"
>>    amba: Properly handle device probe without IRQ domain
>>
>>   drivers/amba/bus.c       | 100 ++++++++++-----------------------------
>>   drivers/of/platform.c    |   6 +--
>>   include/linux/amba/bus.h |  18 -------
>>   3 files changed, 27 insertions(+), 97 deletions(-)
> Reviewed-by: Rob Herring <robh@kernel.org>

Thanks Rob.

Hi Russell, should I send the patches to the ARM patch system?

> .
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 0/3] amba: Properly handle device probe without IRQ domain
  2021-08-23  2:19   ` Kefeng Wang
@ 2021-08-23  9:05     ` Russell King (Oracle)
  2021-08-23 10:57       ` Kefeng Wang
  0 siblings, 1 reply; 25+ messages in thread
From: Russell King (Oracle) @ 2021-08-23  9:05 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Linus Walleij, linux-arm-kernel

On Mon, Aug 23, 2021 at 10:19:23AM +0800, Kefeng Wang wrote:
> 
> On 2021/8/18 6:27, Rob Herring wrote:
> > On Mon, Aug 16, 2021 at 03:46:16PM +0800, Kefeng Wang wrote:
> > > Patch 1 and 2 make some cleanup, and patch 3 use of_irq_get() instead of
> > > irq_of_parse_and_map() to get irq number, return -EPROBE_DEFER if the irq
> > > domain is not yet created, amba_device_add() will properly to handle the
> > > no IRQ domain issue via deferred probe.
> > > 
> > > Kefeng Wang (3):
> > >    amba: Drop unused functions about APB/AHB devices add
> > >    Revert "ARM: amba: make use of -1 IRQs warn"
> > >    amba: Properly handle device probe without IRQ domain
> > > 
> > >   drivers/amba/bus.c       | 100 ++++++++++-----------------------------
> > >   drivers/of/platform.c    |   6 +--
> > >   include/linux/amba/bus.h |  18 -------
> > >   3 files changed, 27 insertions(+), 97 deletions(-)
> > Reviewed-by: Rob Herring <robh@kernel.org>
> 
> Thanks Rob.
> 
> Hi Russell, should I send the patches to the ARM patch system?

Yes please - I'll try to squeeze it in for this cycle but it's getting
a tad late for that. Thanks.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 0/3] amba: Properly handle device probe without IRQ domain
  2021-08-23  9:05     ` Russell King (Oracle)
@ 2021-08-23 10:57       ` Kefeng Wang
  0 siblings, 0 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-23 10:57 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Linus Walleij, linux-arm-kernel, wangkefeng.wang


On 2021/8/23 17:05, Russell King (Oracle) wrote:
> On Mon, Aug 23, 2021 at 10:19:23AM +0800, Kefeng Wang wrote:
>> On 2021/8/18 6:27, Rob Herring wrote:
>>> On Mon, Aug 16, 2021 at 03:46:16PM +0800, Kefeng Wang wrote:
>>>> Patch 1 and 2 make some cleanup, and patch 3 use of_irq_get() instead of
>>>> irq_of_parse_and_map() to get irq number, return -EPROBE_DEFER if the irq
>>>> domain is not yet created, amba_device_add() will properly to handle the
>>>> no IRQ domain issue via deferred probe.
>>>>
>>>> Kefeng Wang (3):
>>>>     amba: Drop unused functions about APB/AHB devices add
>>>>     Revert "ARM: amba: make use of -1 IRQs warn"
>>>>     amba: Properly handle device probe without IRQ domain
>>>>
>>>>    drivers/amba/bus.c       | 100 ++++++++++-----------------------------
>>>>    drivers/of/platform.c    |   6 +--
>>>>    include/linux/amba/bus.h |  18 -------
>>>>    3 files changed, 27 insertions(+), 97 deletions(-)
>>> Reviewed-by: Rob Herring <robh@kernel.org>
>> Thanks Rob.
>>
>> Hi Russell, should I send the patches to the ARM patch system?
> Yes please - I'll try to squeeze it in for this cycle but it's getting
> a tad late for that. Thanks.

Done, but the sequence of patches is reordered at ARM patch system, 
(using git send-email

and deliver patch1/2/3 in order).

BTW,  could you give me some direction the following patchset[1] too if 
you have time, I have

addressed your comments and resend, but there's been no new feedback for 
a long time.

If it is too late for this cycle, I could resend them after 5.15-rc1.

Many thanks.

[1] 
https://lore.kernel.org/linux-arm-kernel/20210610123556.171328-1-wangkefeng.wang@huawei.com/



>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-16  7:46 ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
@ 2021-08-24 20:05   ` Rob Herring
  2021-08-24 20:08     ` Saravana Kannan
  0 siblings, 1 reply; 25+ messages in thread
From: Rob Herring @ 2021-08-24 20:05 UTC (permalink / raw)
  To: Kefeng Wang, Saravana Kannan
  Cc: linux-kernel, Frank Rowand, devicetree, Russell King,
	Linus Walleij, linux-arm-kernel, Ruizhe Lin

+Saravana

Saravana mentioned to me there may be some issues with this one...


On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
> of_amba_device_create() uses irq_of_parse_and_map() to translate
> a DT interrupt specification into a Linux virtual interrupt number.
>
> But it doesn't properly handle the case where the interrupt controller
> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
> interrupt controller, because the mbigen initialization is too late,
> which will lead to no IRQ due to no IRQ domain found, log is shown below,
>   "irq: no irq domain found for uart0 !"
>
> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
> amba_device_try_add()/amba_device_add(), it will properly handle in such
> case, also return 0 in other fail cases to be consistent as before.
>
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: Rob Herring <robh+dt@kernel.org>
> Cc: Frank Rowand <frowand.list@gmail.com>
> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
>  drivers/of/platform.c |  6 +-----
>  2 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
> index 36f2f42c8014..720aa6cdd402 100644
> --- a/drivers/amba/bus.c
> +++ b/drivers/amba/bus.c
> @@ -19,6 +19,7 @@
>  #include <linux/clk/clk-conf.h>
>  #include <linux/platform_device.h>
>  #include <linux/reset.h>
> +#include <linux/of_irq.h>
>
>  #include <asm/irq.h>
>
> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
>         kfree(d);
>  }
>
> +static int of_amba_device_decode_irq(struct amba_device *dev)
> +{
> +       struct device_node *node = dev->dev.of_node;
> +       int i, irq = 0;
> +
> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
> +               /* Decode the IRQs and address ranges */
> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
> +                       irq = of_irq_get(node, i);
> +                       if (irq < 0) {
> +                               if (irq == -EPROBE_DEFER)
> +                                       return irq;
> +                               irq = 0;
> +                       }
> +
> +                       dev->irq[i] = irq;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
>  static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
>  {
>         u32 size;
>         void __iomem *tmp;
>         int i, ret;
>
> +       ret = of_amba_device_decode_irq(dev);
> +       if (ret)
> +               goto err_out;
> +
>         ret = request_resource(parent, &dev->res);
>         if (ret)
>                 goto err_out;
> diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> index 74afbb7a4f5e..32d5ff8df747 100644
> --- a/drivers/of/platform.c
> +++ b/drivers/of/platform.c
> @@ -222,7 +222,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
>  {
>         struct amba_device *dev;
>         const void *prop;
> -       int i, ret;
> +       int ret;
>
>         pr_debug("Creating amba device %pOF\n", node);
>
> @@ -253,10 +253,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
>         if (prop)
>                 dev->periphid = of_read_ulong(prop, 1);
>
> -       /* Decode the IRQs and address ranges */
> -       for (i = 0; i < AMBA_NR_IRQS; i++)
> -               dev->irq[i] = irq_of_parse_and_map(node, i);
> -
>         ret = of_address_to_resource(node, 0, &dev->res);
>         if (ret) {
>                 pr_err("amba: of_address_to_resource() failed (%d) for %pOF\n",
> --
> 2.26.2
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-24 20:05   ` Rob Herring
@ 2021-08-24 20:08     ` Saravana Kannan
  2021-08-25  4:05       ` Kefeng Wang
  0 siblings, 1 reply; 25+ messages in thread
From: Saravana Kannan @ 2021-08-24 20:08 UTC (permalink / raw)
  To: Rob Herring
  Cc: Kefeng Wang, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin

On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
>
> +Saravana
>
> Saravana mentioned to me there may be some issues with this one...
>
>
> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >
> > of_amba_device_create() uses irq_of_parse_and_map() to translate
> > a DT interrupt specification into a Linux virtual interrupt number.
> >
> > But it doesn't properly handle the case where the interrupt controller
> > is not yet available, eg, when pl011 interrupt is connected to MBIGEN
> > interrupt controller, because the mbigen initialization is too late,
> > which will lead to no IRQ due to no IRQ domain found, log is shown below,
> >   "irq: no irq domain found for uart0 !"
> >
> > use of_irq_get() to return -EPROBE_DEFER as above, and in the function
> > amba_device_try_add()/amba_device_add(), it will properly handle in such
> > case, also return 0 in other fail cases to be consistent as before.
> >
> > Cc: Russell King <linux@armlinux.org.uk>
> > Cc: Rob Herring <robh+dt@kernel.org>
> > Cc: Frank Rowand <frowand.list@gmail.com>
> > Reported-by: Ruizhe Lin <linruizhe@huawei.com>
> > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> > ---
> >  drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
> >  drivers/of/platform.c |  6 +-----
> >  2 files changed, 28 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
> > index 36f2f42c8014..720aa6cdd402 100644
> > --- a/drivers/amba/bus.c
> > +++ b/drivers/amba/bus.c
> > @@ -19,6 +19,7 @@
> >  #include <linux/clk/clk-conf.h>
> >  #include <linux/platform_device.h>
> >  #include <linux/reset.h>
> > +#include <linux/of_irq.h>
> >
> >  #include <asm/irq.h>
> >
> > @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
> >         kfree(d);
> >  }
> >
> > +static int of_amba_device_decode_irq(struct amba_device *dev)
> > +{
> > +       struct device_node *node = dev->dev.of_node;
> > +       int i, irq = 0;
> > +
> > +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
> > +               /* Decode the IRQs and address ranges */
> > +               for (i = 0; i < AMBA_NR_IRQS; i++) {
> > +                       irq = of_irq_get(node, i);
> > +                       if (irq < 0) {
> > +                               if (irq == -EPROBE_DEFER)
> > +                                       return irq;
> > +                               irq = 0;
> > +                       }
> > +
> > +                       dev->irq[i] = irq;
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
> >  {
> >         u32 size;
> >         void __iomem *tmp;
> >         int i, ret;
> >
> > +       ret = of_amba_device_decode_irq(dev);
> > +       if (ret)
> > +               goto err_out;
> > +

Similar to other resources the AMBA bus "gets" for the device, I think
this should be moved into amba_probe() and not here. There's no reason
to delay the addition of the device (and loading its module) because
the IRQ isn't ready yet.

-Saravana

> >         ret = request_resource(parent, &dev->res);
> >         if (ret)
> >                 goto err_out;
> > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > index 74afbb7a4f5e..32d5ff8df747 100644
> > --- a/drivers/of/platform.c
> > +++ b/drivers/of/platform.c
> > @@ -222,7 +222,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
> >  {
> >         struct amba_device *dev;
> >         const void *prop;
> > -       int i, ret;
> > +       int ret;
> >
> >         pr_debug("Creating amba device %pOF\n", node);
> >
> > @@ -253,10 +253,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
> >         if (prop)
> >                 dev->periphid = of_read_ulong(prop, 1);
> >
> > -       /* Decode the IRQs and address ranges */
> > -       for (i = 0; i < AMBA_NR_IRQS; i++)
> > -               dev->irq[i] = irq_of_parse_and_map(node, i);
> > -
> >         ret = of_address_to_resource(node, 0, &dev->res);
> >         if (ret) {
> >                 pr_err("amba: of_address_to_resource() failed (%d) for %pOF\n",
> > --
> > 2.26.2
> >

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-24 20:08     ` Saravana Kannan
@ 2021-08-25  4:05       ` Kefeng Wang
  2021-08-25  8:04         ` Saravana Kannan
  2021-08-25 12:33         ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Rob Herring
  0 siblings, 2 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-25  4:05 UTC (permalink / raw)
  To: Saravana Kannan, Rob Herring
  Cc: linux-kernel, Frank Rowand, devicetree, Russell King,
	Linus Walleij, linux-arm-kernel, Ruizhe Lin, wangkefeng.wang


On 2021/8/25 4:08, Saravana Kannan wrote:
> On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
>> +Saravana
>>
>> Saravana mentioned to me there may be some issues with this one...
>>
>>
>> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>> of_amba_device_create() uses irq_of_parse_and_map() to translate
>>> a DT interrupt specification into a Linux virtual interrupt number.
>>>
>>> But it doesn't properly handle the case where the interrupt controller
>>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
>>> interrupt controller, because the mbigen initialization is too late,
>>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
>>>    "irq: no irq domain found for uart0 !"
>>>
>>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
>>> amba_device_try_add()/amba_device_add(), it will properly handle in such
>>> case, also return 0 in other fail cases to be consistent as before.
>>>
>>> Cc: Russell King <linux@armlinux.org.uk>
>>> Cc: Rob Herring <robh+dt@kernel.org>
>>> Cc: Frank Rowand <frowand.list@gmail.com>
>>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---
>>>   drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
>>>   drivers/of/platform.c |  6 +-----
>>>   2 files changed, 28 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
>>> index 36f2f42c8014..720aa6cdd402 100644
>>> --- a/drivers/amba/bus.c
>>> +++ b/drivers/amba/bus.c
>>> @@ -19,6 +19,7 @@
>>>   #include <linux/clk/clk-conf.h>
>>>   #include <linux/platform_device.h>
>>>   #include <linux/reset.h>
>>> +#include <linux/of_irq.h>
>>>
>>>   #include <asm/irq.h>
>>>
>>> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
>>>          kfree(d);
>>>   }
>>>
>>> +static int of_amba_device_decode_irq(struct amba_device *dev)
>>> +{
>>> +       struct device_node *node = dev->dev.of_node;
>>> +       int i, irq = 0;
>>> +
>>> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
>>> +               /* Decode the IRQs and address ranges */
>>> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
>>> +                       irq = of_irq_get(node, i);
>>> +                       if (irq < 0) {
>>> +                               if (irq == -EPROBE_DEFER)
>>> +                                       return irq;
>>> +                               irq = 0;
>>> +                       }
>>> +
>>> +                       dev->irq[i] = irq;
>>> +               }
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>>   static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
>>>   {
>>>          u32 size;
>>>          void __iomem *tmp;
>>>          int i, ret;
>>>
>>> +       ret = of_amba_device_decode_irq(dev);
>>> +       if (ret)
>>> +               goto err_out;
>>> +
> Similar to other resources the AMBA bus "gets" for the device, I think
> this should be moved into amba_probe() and not here. There's no reason
> to delay the addition of the device (and loading its module) because
> the IRQ isn't ready yet.

The following code in the amba_device_try_add() will be called, it uses irq[0]
and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().

470         if (dev->irq[0])
471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
472         if (ret == 0 && dev->irq[1])
473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
474         if (ret == 0)
475                 return ret;

of_amba_device_decode_irq() in amba_device_try_add() won't lead to issue,
only delay the device add, right?

If make it into amba_probe(), the above code should be moved too, could we
make a new patch to move both of them, or don't move them?


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-25  4:05       ` Kefeng Wang
@ 2021-08-25  8:04         ` Saravana Kannan
  2021-08-25  8:39           ` Kefeng Wang
  2021-08-26  2:45           ` Kefeng Wang
  2021-08-25 12:33         ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Rob Herring
  1 sibling, 2 replies; 25+ messages in thread
From: Saravana Kannan @ 2021-08-25  8:04 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin

On Tue, Aug 24, 2021 at 9:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> On 2021/8/25 4:08, Saravana Kannan wrote:
> > On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
> >> +Saravana
> >>
> >> Saravana mentioned to me there may be some issues with this one...
> >>
> >>
> >> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>> of_amba_device_create() uses irq_of_parse_and_map() to translate
> >>> a DT interrupt specification into a Linux virtual interrupt number.
> >>>
> >>> But it doesn't properly handle the case where the interrupt controller
> >>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
> >>> interrupt controller, because the mbigen initialization is too late,
> >>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
> >>>    "irq: no irq domain found for uart0 !"
> >>>
> >>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
> >>> amba_device_try_add()/amba_device_add(), it will properly handle in such
> >>> case, also return 0 in other fail cases to be consistent as before.
> >>>
> >>> Cc: Russell King <linux@armlinux.org.uk>
> >>> Cc: Rob Herring <robh+dt@kernel.org>
> >>> Cc: Frank Rowand <frowand.list@gmail.com>
> >>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
> >>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> >>> ---
> >>>   drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
> >>>   drivers/of/platform.c |  6 +-----
> >>>   2 files changed, 28 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
> >>> index 36f2f42c8014..720aa6cdd402 100644
> >>> --- a/drivers/amba/bus.c
> >>> +++ b/drivers/amba/bus.c
> >>> @@ -19,6 +19,7 @@
> >>>   #include <linux/clk/clk-conf.h>
> >>>   #include <linux/platform_device.h>
> >>>   #include <linux/reset.h>
> >>> +#include <linux/of_irq.h>
> >>>
> >>>   #include <asm/irq.h>
> >>>
> >>> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
> >>>          kfree(d);
> >>>   }
> >>>
> >>> +static int of_amba_device_decode_irq(struct amba_device *dev)
> >>> +{
> >>> +       struct device_node *node = dev->dev.of_node;
> >>> +       int i, irq = 0;
> >>> +
> >>> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
> >>> +               /* Decode the IRQs and address ranges */
> >>> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
> >>> +                       irq = of_irq_get(node, i);
> >>> +                       if (irq < 0) {
> >>> +                               if (irq == -EPROBE_DEFER)
> >>> +                                       return irq;
> >>> +                               irq = 0;
> >>> +                       }
> >>> +
> >>> +                       dev->irq[i] = irq;
> >>> +               }
> >>> +       }
> >>> +
> >>> +       return 0;
> >>> +}
> >>> +
> >>>   static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
> >>>   {
> >>>          u32 size;
> >>>          void __iomem *tmp;
> >>>          int i, ret;
> >>>
> >>> +       ret = of_amba_device_decode_irq(dev);
> >>> +       if (ret)
> >>> +               goto err_out;
> >>> +
> > Similar to other resources the AMBA bus "gets" for the device, I think
> > this should be moved into amba_probe() and not here. There's no reason
> > to delay the addition of the device (and loading its module) because
> > the IRQ isn't ready yet.
>
> The following code in the amba_device_try_add() will be called, it uses irq[0]
> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
>
> 470         if (dev->irq[0])
> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
> 472         if (ret == 0 && dev->irq[1])
> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
> 474         if (ret == 0)
> 475                 return ret;
>
> of_amba_device_decode_irq() in amba_device_try_add() won't lead to issue,
> only delay the device add, right?

But delaying the device add is the issue. For example, adding a device
could trigger the loading of the corresponding module using uevents.
But now this change would delay that step. That can have other
unintended consequences -- slowing down boot, what if the driver was
working fine without the IRQ, etc.

> If make it into amba_probe(), the above code should be moved too, could we
> make a new patch to move both of them, or don't move them?

I'd say move them both. If Russell hasn't already picked this up, then
I'd say redo your Patch 3/3.

Btw, I've been working on [1] cleaning up the one-off deferred probe
solution that we have for amba devices. That causes a bunch of other
headaches. Your patch 3/3 takes us further in the wrong direction by
adding more reasons for delaying the addition of the device.

-Saravana

[1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-25  8:04         ` Saravana Kannan
@ 2021-08-25  8:39           ` Kefeng Wang
  2021-08-26  2:45           ` Kefeng Wang
  1 sibling, 0 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-25  8:39 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin


On 2021/8/25 16:04, Saravana Kannan wrote:
> On Tue, Aug 24, 2021 at 9:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>> On 2021/8/25 4:08, Saravana Kannan wrote:
>>> On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
>>>> +Saravana
>>>>
>>>> Saravana mentioned to me there may be some issues with this one...
>>>>
>>>>
>>>> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>> of_amba_device_create() uses irq_of_parse_and_map() to translate
>>>>> a DT interrupt specification into a Linux virtual interrupt number.
>>>>>
>>>>> But it doesn't properly handle the case where the interrupt controller
>>>>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
>>>>> interrupt controller, because the mbigen initialization is too late,
>>>>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
>>>>>     "irq: no irq domain found for uart0 !"
>>>>>
>>>>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
>>>>> amba_device_try_add()/amba_device_add(), it will properly handle in such
>>>>> case, also return 0 in other fail cases to be consistent as before.
>>>>>
>>>>> Cc: Russell King <linux@armlinux.org.uk>
>>>>> Cc: Rob Herring <robh+dt@kernel.org>
>>>>> Cc: Frank Rowand <frowand.list@gmail.com>
>>>>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
>>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>>> ---
>>>>>    drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
>>>>>    drivers/of/platform.c |  6 +-----
>>>>>    2 files changed, 28 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
>>>>> index 36f2f42c8014..720aa6cdd402 100644
>>>>> --- a/drivers/amba/bus.c
>>>>> +++ b/drivers/amba/bus.c
>>>>> @@ -19,6 +19,7 @@
>>>>>    #include <linux/clk/clk-conf.h>
>>>>>    #include <linux/platform_device.h>
>>>>>    #include <linux/reset.h>
>>>>> +#include <linux/of_irq.h>
>>>>>
>>>>>    #include <asm/irq.h>
>>>>>
>>>>> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
>>>>>           kfree(d);
>>>>>    }
>>>>>
>>>>> +static int of_amba_device_decode_irq(struct amba_device *dev)
>>>>> +{
>>>>> +       struct device_node *node = dev->dev.of_node;
>>>>> +       int i, irq = 0;
>>>>> +
>>>>> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
>>>>> +               /* Decode the IRQs and address ranges */
>>>>> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
>>>>> +                       irq = of_irq_get(node, i);
>>>>> +                       if (irq < 0) {
>>>>> +                               if (irq == -EPROBE_DEFER)
>>>>> +                                       return irq;
>>>>> +                               irq = 0;
>>>>> +                       }
>>>>> +
>>>>> +                       dev->irq[i] = irq;
>>>>> +               }
>>>>> +       }
>>>>> +
>>>>> +       return 0;
>>>>> +}
>>>>> +
>>>>>    static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
>>>>>    {
>>>>>           u32 size;
>>>>>           void __iomem *tmp;
>>>>>           int i, ret;
>>>>>
>>>>> +       ret = of_amba_device_decode_irq(dev);
>>>>> +       if (ret)
>>>>> +               goto err_out;
>>>>> +
>>> Similar to other resources the AMBA bus "gets" for the device, I think
>>> this should be moved into amba_probe() and not here. There's no reason
>>> to delay the addition of the device (and loading its module) because
>>> the IRQ isn't ready yet.
>> The following code in the amba_device_try_add() will be called, it uses irq[0]
>> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
>>
>> 470         if (dev->irq[0])
>> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
>> 472         if (ret == 0 && dev->irq[1])
>> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
>> 474         if (ret == 0)
>> 475                 return ret;
>>
>> of_amba_device_decode_irq() in amba_device_try_add() won't lead to issue,
>> only delay the device add, right?
> But delaying the device add is the issue. For example, adding a device
> could trigger the loading of the corresponding module using uevents.
> But now this change would delay that step. That can have other
> unintended consequences -- slowing down boot, what if the driver was
> working fine without the IRQ, etc.
>
>> If make it into amba_probe(), the above code should be moved too, could we
>> make a new patch to move both of them, or don't move them?
> I'd say move them both. If Russell hasn't already picked this up, then
> I'd say redo your Patch 3/3.


Sure,I will update it and resend.

>
> Btw, I've been working on [1] cleaning up the one-off deferred probe
> solution that we have for amba devices. That causes a bunch of other
> headaches. Your patch 3/3 takes us further in the wrong direction by
> adding more reasons for delaying the addition of the device.
Thanks for your explanation.
> -Saravana
>
> [1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/
> .
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-25  4:05       ` Kefeng Wang
  2021-08-25  8:04         ` Saravana Kannan
@ 2021-08-25 12:33         ` Rob Herring
  2021-08-25 14:41           ` Kefeng Wang
  1 sibling, 1 reply; 25+ messages in thread
From: Rob Herring @ 2021-08-25 12:33 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Saravana Kannan, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin

On Tue, Aug 24, 2021 at 11:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> On 2021/8/25 4:08, Saravana Kannan wrote:
> > On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
> >> +Saravana
> >>
> >> Saravana mentioned to me there may be some issues with this one...
> >>
> >>
> >> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>> of_amba_device_create() uses irq_of_parse_and_map() to translate
> >>> a DT interrupt specification into a Linux virtual interrupt number.
> >>>
> >>> But it doesn't properly handle the case where the interrupt controller
> >>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
> >>> interrupt controller, because the mbigen initialization is too late,
> >>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
> >>>    "irq: no irq domain found for uart0 !"
> >>>
> >>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
> >>> amba_device_try_add()/amba_device_add(), it will properly handle in such
> >>> case, also return 0 in other fail cases to be consistent as before.
> >>>
> >>> Cc: Russell King <linux@armlinux.org.uk>
> >>> Cc: Rob Herring <robh+dt@kernel.org>
> >>> Cc: Frank Rowand <frowand.list@gmail.com>
> >>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
> >>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> >>> ---
> >>>   drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
> >>>   drivers/of/platform.c |  6 +-----
> >>>   2 files changed, 28 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
> >>> index 36f2f42c8014..720aa6cdd402 100644
> >>> --- a/drivers/amba/bus.c
> >>> +++ b/drivers/amba/bus.c
> >>> @@ -19,6 +19,7 @@
> >>>   #include <linux/clk/clk-conf.h>
> >>>   #include <linux/platform_device.h>
> >>>   #include <linux/reset.h>
> >>> +#include <linux/of_irq.h>
> >>>
> >>>   #include <asm/irq.h>
> >>>
> >>> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
> >>>          kfree(d);
> >>>   }
> >>>
> >>> +static int of_amba_device_decode_irq(struct amba_device *dev)
> >>> +{
> >>> +       struct device_node *node = dev->dev.of_node;
> >>> +       int i, irq = 0;
> >>> +
> >>> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
> >>> +               /* Decode the IRQs and address ranges */
> >>> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
> >>> +                       irq = of_irq_get(node, i);
> >>> +                       if (irq < 0) {
> >>> +                               if (irq == -EPROBE_DEFER)
> >>> +                                       return irq;
> >>> +                               irq = 0;
> >>> +                       }
> >>> +
> >>> +                       dev->irq[i] = irq;
> >>> +               }
> >>> +       }
> >>> +
> >>> +       return 0;
> >>> +}
> >>> +
> >>>   static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
> >>>   {
> >>>          u32 size;
> >>>          void __iomem *tmp;
> >>>          int i, ret;
> >>>
> >>> +       ret = of_amba_device_decode_irq(dev);
> >>> +       if (ret)
> >>> +               goto err_out;
> >>> +
> > Similar to other resources the AMBA bus "gets" for the device, I think
> > this should be moved into amba_probe() and not here. There's no reason
> > to delay the addition of the device (and loading its module) because
> > the IRQ isn't ready yet.
>
> The following code in the amba_device_try_add() will be called, it uses irq[0]
> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
>
> 470         if (dev->irq[0])
> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
> 472         if (ret == 0 && dev->irq[1])
> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
> 474         if (ret == 0)
> 475                 return ret;

I wonder if we could just remove these. Why does userspace need them
in the first place? It's only an ABI if someone notices. Looking at
the history, AMBA bus was added in 2003 with just 'irq' and then
changed (ABI break) in 2004 to 'irq0' and 'irq1'.

Rob

[1] https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/log/arch/arm/common/amba.c

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-25 12:33         ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Rob Herring
@ 2021-08-25 14:41           ` Kefeng Wang
  0 siblings, 0 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-25 14:41 UTC (permalink / raw)
  To: Rob Herring
  Cc: Saravana Kannan, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin


On 2021/8/25 20:33, Rob Herring wrote:
> On Tue, Aug 24, 2021 at 11:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:

...

>>> Similar to other resources the AMBA bus "gets" for the device, I think
>>> this should be moved into amba_probe() and not here. There's no reason
>>> to delay the addition of the device (and loading its module) because
>>> the IRQ isn't ready yet.
>> The following code in the amba_device_try_add() will be called, it uses irq[0]
>> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
>>
>> 470         if (dev->irq[0])
>> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
>> 472         if (ret == 0 && dev->irq[1])
>> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
>> 474         if (ret == 0)
>> 475                 return ret;
> I wonder if we could just remove these. Why does userspace need them
> in the first place? It's only an ABI if someone notices. Looking at
> the history, AMBA bus was added in 2003 with just 'irq' and then
> changed (ABI break) in 2004 to 'irq0' and 'irq1'.
>
> Rob

Ok, I will kill all irq parts,

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 962041148482..c08e8b30e02c 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -20,8 +20,6 @@
  #include <linux/platform_device.h>
  #include <linux/reset.h>

-#include <asm/irq.h>
-
  #define to_amba_driver(d)      container_of(d, struct amba_driver, drv)

  /* called on periphid match and class 0x9 coresight device. */
@@ -135,8 +133,6 @@ static ssize_t name##_show(struct device 
*_dev,                             \
  static DEVICE_ATTR_RO(name)

  amba_attr_func(id, "%08x\n", dev->periphid);
-amba_attr_func(irq0, "%u\n", dev->irq[0]);
-amba_attr_func(irq1, "%u\n", dev->irq[1]);
  amba_attr_func(resource, "\t%016llx\t%016llx\t%016lx\n",
          (unsigned long long)dev->res.start, (unsigned long 
long)dev->res.end,
          dev->res.flags);
@@ -467,10 +463,6 @@ static int amba_device_try_add(struct amba_device 
*dev, struct resource *parent)
         if (ret)
                 goto err_release;

-       if (dev->irq[0])
-               ret = device_create_file(&dev->dev, &dev_attr_irq0);
-       if (ret == 0 && dev->irq[1])
-               ret = device_create_file(&dev->dev, &dev_attr_irq1);

and do some cleanup about error handling in the next version.

>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/log/arch/arm/common/amba.c
> .
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-25  8:04         ` Saravana Kannan
  2021-08-25  8:39           ` Kefeng Wang
@ 2021-08-26  2:45           ` Kefeng Wang
  2021-08-26  4:45             ` Saravana Kannan
  1 sibling, 1 reply; 25+ messages in thread
From: Kefeng Wang @ 2021-08-26  2:45 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin


On 2021/8/25 16:04, Saravana Kannan wrote:
> On Tue, Aug 24, 2021 at 9:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>> On 2021/8/25 4:08, Saravana Kannan wrote:
>>> On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
>>>> +Saravana
>>>>
>>>> Saravana mentioned to me there may be some issues with this one...
>>>>
>>>>
>>>> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>> of_amba_device_create() uses irq_of_parse_and_map() to translate
>>>>> a DT interrupt specification into a Linux virtual interrupt number.
>>>>>
>>>>> But it doesn't properly handle the case where the interrupt controller
>>>>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
>>>>> interrupt controller, because the mbigen initialization is too late,
>>>>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
>>>>>     "irq: no irq domain found for uart0 !"
>>>>>
>>>>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
>>>>> amba_device_try_add()/amba_device_add(), it will properly handle in such
>>>>> case, also return 0 in other fail cases to be consistent as before.
>>>>>
>>>>> Cc: Russell King <linux@armlinux.org.uk>
>>>>> Cc: Rob Herring <robh+dt@kernel.org>
>>>>> Cc: Frank Rowand <frowand.list@gmail.com>
>>>>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
>>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>>> ---
>>>>>    drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
>>>>>    drivers/of/platform.c |  6 +-----
>>>>>    2 files changed, 28 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
>>>>> index 36f2f42c8014..720aa6cdd402 100644
>>>>> --- a/drivers/amba/bus.c
>>>>> +++ b/drivers/amba/bus.c
>>>>> @@ -19,6 +19,7 @@
>>>>>    #include <linux/clk/clk-conf.h>
>>>>>    #include <linux/platform_device.h>
>>>>>    #include <linux/reset.h>
>>>>> +#include <linux/of_irq.h>
>>>>>
>>>>>    #include <asm/irq.h>
>>>>>
>>>>> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
>>>>>           kfree(d);
>>>>>    }
>>>>>
>>>>> +static int of_amba_device_decode_irq(struct amba_device *dev)
>>>>> +{
>>>>> +       struct device_node *node = dev->dev.of_node;
>>>>> +       int i, irq = 0;
>>>>> +
>>>>> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
>>>>> +               /* Decode the IRQs and address ranges */
>>>>> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
>>>>> +                       irq = of_irq_get(node, i);
>>>>> +                       if (irq < 0) {
>>>>> +                               if (irq == -EPROBE_DEFER)
>>>>> +                                       return irq;
>>>>> +                               irq = 0;
>>>>> +                       }
>>>>> +
>>>>> +                       dev->irq[i] = irq;
>>>>> +               }
>>>>> +       }
>>>>> +
>>>>> +       return 0;
>>>>> +}
>>>>> +
>>>>>    static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
>>>>>    {
>>>>>           u32 size;
>>>>>           void __iomem *tmp;
>>>>>           int i, ret;
>>>>>
>>>>> +       ret = of_amba_device_decode_irq(dev);
>>>>> +       if (ret)
>>>>> +               goto err_out;
>>>>> +
>>> Similar to other resources the AMBA bus "gets" for the device, I think
>>> this should be moved into amba_probe() and not here. There's no reason
>>> to delay the addition of the device (and loading its module) because
>>> the IRQ isn't ready yet.
>> The following code in the amba_device_try_add() will be called, it uses irq[0]
>> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
>>
>> 470         if (dev->irq[0])
>> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
>> 472         if (ret == 0 && dev->irq[1])
>> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
>> 474         if (ret == 0)
>> 475                 return ret;
>>
>> of_amba_device_decode_irq() in amba_device_try_add() won't lead to issue,
>> only delay the device add, right?
> But delaying the device add is the issue. For example, adding a device
> could trigger the loading of the corresponding module using uevents.
> But now this change would delay that step. That can have other
> unintended consequences -- slowing down boot, what if the driver was
> working fine without the IRQ, etc.
>
>> If make it into amba_probe(), the above code should be moved too, could we
>> make a new patch to move both of them, or don't move them?
> I'd say move them both. If Russell hasn't already picked this up, then
> I'd say redo your Patch 3/3.
I will resend with put it into amba_probe.
>
> Btw, I've been working on [1] cleaning up the one-off deferred probe
> solution that we have for amba devices. That causes a bunch of other
> headaches. Your patch 3/3 takes us further in the wrong direction by
> adding more reasons for delaying the addition of the device.

Got it,  and I could resend all combine your patch(due to context conflict

when changing same function) if you no object.


>
> -Saravana
>
> [1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/
> .
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-26  2:45           ` Kefeng Wang
@ 2021-08-26  4:45             ` Saravana Kannan
  2021-08-26  6:22               ` Kefeng Wang
       [not found]               ` <df8e7756-8b0d-d7de-a9ff-3f6eb0ffa8a5@huawei.com>
  0 siblings, 2 replies; 25+ messages in thread
From: Saravana Kannan @ 2021-08-26  4:45 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin

On Wed, Aug 25, 2021 at 7:45 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> On 2021/8/25 16:04, Saravana Kannan wrote:
> > On Tue, Aug 24, 2021 at 9:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>
> >> On 2021/8/25 4:08, Saravana Kannan wrote:
> >>> On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
> >>>> +Saravana
> >>>>
> >>>> Saravana mentioned to me there may be some issues with this one...
> >>>>
> >>>>
> >>>> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>>>> of_amba_device_create() uses irq_of_parse_and_map() to translate
> >>>>> a DT interrupt specification into a Linux virtual interrupt number.
> >>>>>
> >>>>> But it doesn't properly handle the case where the interrupt controller
> >>>>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
> >>>>> interrupt controller, because the mbigen initialization is too late,
> >>>>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
> >>>>>     "irq: no irq domain found for uart0 !"
> >>>>>
> >>>>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
> >>>>> amba_device_try_add()/amba_device_add(), it will properly handle in such
> >>>>> case, also return 0 in other fail cases to be consistent as before.
> >>>>>
> >>>>> Cc: Russell King <linux@armlinux.org.uk>
> >>>>> Cc: Rob Herring <robh+dt@kernel.org>
> >>>>> Cc: Frank Rowand <frowand.list@gmail.com>
> >>>>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
> >>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> >>>>> ---
> >>>>>    drivers/amba/bus.c    | 27 +++++++++++++++++++++++++++
> >>>>>    drivers/of/platform.c |  6 +-----
> >>>>>    2 files changed, 28 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
> >>>>> index 36f2f42c8014..720aa6cdd402 100644
> >>>>> --- a/drivers/amba/bus.c
> >>>>> +++ b/drivers/amba/bus.c
> >>>>> @@ -19,6 +19,7 @@
> >>>>>    #include <linux/clk/clk-conf.h>
> >>>>>    #include <linux/platform_device.h>
> >>>>>    #include <linux/reset.h>
> >>>>> +#include <linux/of_irq.h>
> >>>>>
> >>>>>    #include <asm/irq.h>
> >>>>>
> >>>>> @@ -371,12 +372,38 @@ static void amba_device_release(struct device *dev)
> >>>>>           kfree(d);
> >>>>>    }
> >>>>>
> >>>>> +static int of_amba_device_decode_irq(struct amba_device *dev)
> >>>>> +{
> >>>>> +       struct device_node *node = dev->dev.of_node;
> >>>>> +       int i, irq = 0;
> >>>>> +
> >>>>> +       if (IS_ENABLED(CONFIG_OF_IRQ) && node) {
> >>>>> +               /* Decode the IRQs and address ranges */
> >>>>> +               for (i = 0; i < AMBA_NR_IRQS; i++) {
> >>>>> +                       irq = of_irq_get(node, i);
> >>>>> +                       if (irq < 0) {
> >>>>> +                               if (irq == -EPROBE_DEFER)
> >>>>> +                                       return irq;
> >>>>> +                               irq = 0;
> >>>>> +                       }
> >>>>> +
> >>>>> +                       dev->irq[i] = irq;
> >>>>> +               }
> >>>>> +       }
> >>>>> +
> >>>>> +       return 0;
> >>>>> +}
> >>>>> +
> >>>>>    static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
> >>>>>    {
> >>>>>           u32 size;
> >>>>>           void __iomem *tmp;
> >>>>>           int i, ret;
> >>>>>
> >>>>> +       ret = of_amba_device_decode_irq(dev);
> >>>>> +       if (ret)
> >>>>> +               goto err_out;
> >>>>> +
> >>> Similar to other resources the AMBA bus "gets" for the device, I think
> >>> this should be moved into amba_probe() and not here. There's no reason
> >>> to delay the addition of the device (and loading its module) because
> >>> the IRQ isn't ready yet.
> >> The following code in the amba_device_try_add() will be called, it uses irq[0]
> >> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
> >>
> >> 470         if (dev->irq[0])
> >> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
> >> 472         if (ret == 0 && dev->irq[1])
> >> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
> >> 474         if (ret == 0)
> >> 475                 return ret;
> >>
> >> of_amba_device_decode_irq() in amba_device_try_add() won't lead to issue,
> >> only delay the device add, right?
> > But delaying the device add is the issue. For example, adding a device
> > could trigger the loading of the corresponding module using uevents.
> > But now this change would delay that step. That can have other
> > unintended consequences -- slowing down boot, what if the driver was
> > working fine without the IRQ, etc.
> >
> >> If make it into amba_probe(), the above code should be moved too, could we
> >> make a new patch to move both of them, or don't move them?
> > I'd say move them both. If Russell hasn't already picked this up, then
> > I'd say redo your Patch 3/3.
> I will resend with put it into amba_probe.
> >
> > Btw, I've been working on [1] cleaning up the one-off deferred probe
> > solution that we have for amba devices. That causes a bunch of other
> > headaches. Your patch 3/3 takes us further in the wrong direction by
> > adding more reasons for delaying the addition of the device.
>
> Got it,  and I could resend all combine your patch(due to context conflict
>
> when changing same function) if you no object.

If you want to resolve the conflict with my patch and resend it while
keeping me as the author, I would definitely appreciate it.

-Saravana
>
>
> >
> > -Saravana
> >
> > [1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/
> > .
> >

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/3] amba: Properly handle device probe without IRQ domain
  2021-08-26  4:45             ` Saravana Kannan
@ 2021-08-26  6:22               ` Kefeng Wang
       [not found]               ` <df8e7756-8b0d-d7de-a9ff-3f6eb0ffa8a5@huawei.com>
  1 sibling, 0 replies; 25+ messages in thread
From: Kefeng Wang @ 2021-08-26  6:22 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Ruizhe Lin


On 2021/8/26 12:45, Saravana Kannan wrote:
> On Wed, Aug 25, 2021 at 7:45 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>> On 2021/8/25 16:04, Saravana Kannan wrote:
>>> On Tue, Aug 24, 2021 at 9:05 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>> On 2021/8/25 4:08, Saravana Kannan wrote:
>>>>> On Tue, Aug 24, 2021 at 1:05 PM Rob Herring <robh+dt@kernel.org> wrote:
>>>>>> +Saravana
>>>>>>
>>>>>> Saravana mentioned to me there may be some issues with this one...
>>>>>>
>>>>>>
>>>>>> On Mon, Aug 16, 2021 at 2:43 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>>>> of_amba_device_create() uses irq_of_parse_and_map() to translate
>>>>>>> a DT interrupt specification into a Linux virtual interrupt number.
>>>>>>>
>>>>>>> But it doesn't properly handle the case where the interrupt controller
>>>>>>> is not yet available, eg, when pl011 interrupt is connected to MBIGEN
>>>>>>> interrupt controller, because the mbigen initialization is too late,
>>>>>>> which will lead to no IRQ due to no IRQ domain found, log is shown below,
>>>>>>>      "irq: no irq domain found for uart0 !"
>>>>>>>
>>>>>>> use of_irq_get() to return -EPROBE_DEFER as above, and in the function
>>>>>>> amba_device_try_add()/amba_device_add(), it will properly handle in such
>>>>>>> case, also return 0 in other fail cases to be consistent as before.
>>>>>>>
>>>>>>> Cc: Russell King <linux@armlinux.org.uk>
>>>>>>> Cc: Rob Herring <robh+dt@kernel.org>
>>>>>>> Cc: Frank Rowand <frowand.list@gmail.com>
>>>>>>> Reported-by: Ruizhe Lin <linruizhe@huawei.com>
>>>>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>>>>> ---
...
>>>>> Similar to other resources the AMBA bus "gets" for the device, I think
>>>>> this should be moved into amba_probe() and not here. There's no reason
>>>>> to delay the addition of the device (and loading its module) because
>>>>> the IRQ isn't ready yet.
>>>> The following code in the amba_device_try_add() will be called, it uses irq[0]
>>>> and irq[1], so I put of_amba_device_decode_irq() into amba_device_try_add().
>>>>
>>>> 470         if (dev->irq[0])
>>>> 471                 ret = device_create_file(&dev->dev, &dev_attr_irq0);
>>>> 472         if (ret == 0 && dev->irq[1])
>>>> 473                 ret = device_create_file(&dev->dev, &dev_attr_irq1);
>>>> 474         if (ret == 0)
>>>> 475                 return ret;
>>>>
>>>> of_amba_device_decode_irq() in amba_device_try_add() won't lead to issue,
>>>> only delay the device add, right?
>>> But delaying the device add is the issue. For example, adding a device
>>> could trigger the loading of the corresponding module using uevents.
>>> But now this change would delay that step. That can have other
>>> unintended consequences -- slowing down boot, what if the driver was
>>> working fine without the IRQ, etc.
>>>
>>>> If make it into amba_probe(), the above code should be moved too, could we
>>>> make a new patch to move both of them, or don't move them?
>>> I'd say move them both. If Russell hasn't already picked this up, then
>>> I'd say redo your Patch 3/3.
>> I will resend with put it into amba_probe.
>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
>>> solution that we have for amba devices. That causes a bunch of other
>>> headaches. Your patch 3/3 takes us further in the wrong direction by
>>> adding more reasons for delaying the addition of the device.
>> Got it,  and I could resend all combine your patch(due to context conflict
>>
>> when changing same function) if you no object.
> If you want to resolve the conflict with my patch and resend it while
> keeping me as the author, I would definitely appreciate it.
Yes, I will keep it, and rebase my patch based on it.
>
> -Saravana
>>
>>> -Saravana
>>>
>>> [1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/
>>> .
>>>
> .
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
       [not found]               ` <df8e7756-8b0d-d7de-a9ff-3f6eb0ffa8a5@huawei.com>
@ 2021-08-27  0:04                 ` Saravana Kannan
       [not found]                   ` <85b28900-5f42-b997-2ded-0b952bc2a03e@huawei.com>
  0 siblings, 1 reply; 25+ messages in thread
From: Saravana Kannan @ 2021-08-27  0:04 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel

On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> >>> Btw, I've been working on [1] cleaning up the one-off deferred probe
> >>> solution that we have for amba devices. That causes a bunch of other
> >>> headaches. Your patch 3/3 takes us further in the wrong direction by
> >>> adding more reasons for delaying the addition of the device.
>
> Hi Saravana, I try the link[1], but with it, there is a crash when boot
> (qemu-system-arm -M vexpress-a15),

Hi,

It's hard to make sense of the logs. Looks like two different threads
might be printing to the log at the same time? Can you please enable
the config that prints the thread ID (forgot what it's called) and
collect this again? With what I could tell the crash seems to be
happening somewhere in platform_match(), but that's not related to
this patch at all?

-Saravana

>
> without it, boot successfully.
>
> [    2.246057] aaci-pl041 1c040000.aaci: ARM AC'97 Interface PL041 rev0
> at 0x1c040000, irq 36
> [    2.246357] aaci-pl041 1c040000.aaci: FIFO 512 entries
> [    2.248617] NET: Registered PF_PACKET protocol family
> [    2.250481] 9pnet: Installing 9P2000 support
> [    2.251474] Registering SWP/SWPB emulation handler
> [    2.284374] 1c090000.serial: ttyAMA0 at MMIO 0x1c090000 (irq = 41,
> base_baud = 0) is a PL011 rev1
> [    2.287797] printk: console [ttyAMA0] enabled
> [    2.287797] printk: console [ttyAMA0] enabled
> [    2.288262] input: AT Raw Set 2 keyboard as
> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
> [    2.288262] input: AT Raw Set 2 keyboard as
> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
> [    2.288755] printk: bootconsole [earlycon0] disabled
> [    2.288755] printk: bootconsole [earlycon0] disabled
> [    2.294507] 1c0a0000.serial: ttyAMA1 at MMIO 0x1c0a0000 (irq = 42,
> base_baud = 0) is a PL011 rev1
> [    2.296950] 1c0b0000.serial: ttyAMA2 at MMIO 0x1c0b0000 (irq = 43,
> base_baud = 0) is a PL011 rev1
> [    2.298636] 1c0c0000.serial: ttyAMA3 at MMIO 0x1c0c0000 (irq = 44,
> base_baud = 0) is a PL011 rev1
> [    2.300496] 8<--- cut here ---
> [    2.300775] ------------[ cut here ]------------
> [    2.301260] Unable to handle kernel NULL pointer dereference at
> virtual address 00000000
> [    2.300928] WARNING: CPU: 1 PID: 27 at
> /home/wkf/work/hulk/lib/refcount.c:25 refcount_warn_saturate+0x108/0x174
> [    2.301700] pgd = (ptrval)
> [    2.302002] refcount_t: addition on 0; use-after-free.
> [    2.302184] [00000000] *pgd=00000000
> [    2.302363] Modules linked in:
> [    2.302384]
> [    2.302753]
> [    2.303501] CPU: 1 PID: 27 Comm: kworker/u4:1 Not tainted 5.14.0-rc7+
> #193
> [    2.303990] Hardware name: ARM-Versatile Express
> [    2.304198] Internal error: Oops: 5 [#1] SMP ARM
> [    2.304537] Workqueue: events_unbound deferred_probe_work_func
> [    2.304829] Modules linked in:
> [    2.304865]
> [    2.305133]
> [    2.305401] Backtrace:
> [    2.305562] CPU: 0 PID: 41 Comm: kworker/0:2 Not tainted 5.14.0-rc7+ #193
> [    2.305614] Hardware name: ARM-Versatile Express
> [    2.305576]
> [    2.305781] [<c010c780>] (dump_backtrace) from [<c010cacc>]
> (show_stack+0x20/0x24)
> [    2.306266] Workqueue: events_long serio_handle_event
> [    2.306732]  r7:00000009 r6:00000000 r5:c0b1efb8 r4:600b0093
> [    2.306889] PC is at strcmp+0x18/0x44
> [    2.307115] [<c010caac>] (show_stack) from [<c091eba4>]
> (dump_stack_lvl+0x48/0x54)
> [    2.307263] LR is at platform_match+0xb8/0xcc
> [    2.307498] [<c091eb5c>] (dump_stack_lvl) from [<c091ebc8>]
> (dump_stack+0x18/0x1c)
> [    2.307739] pc : [<c0560aec>]    lr : [<c064626c>]    psr: 60000013
> [    2.307988]  r5:c0b4ef98 r4:c165ddc4
> [    2.308317] sp : c1675d70  ip : c1675d80  fp : c1675d7c
> [    2.308433] [<c091ebb0>] (dump_stack) from [<c01289c4>]
> (__warn+0x110/0x114)
> [    2.308743] r10: 00000000  r9 : 00000000  r8 : 00000001
> [    2.308961] [<c01288b4>] (__warn) from [<c0128a4c>]
> (warn_slowpath_fmt+0x84/0xc0)
> [    2.309252] r7 : c0d04d08  r6 : c13aed18  r5 : c1090fc0  r4 : c13aed18
> [    2.309547]  r9:00000009 r8:c0504f10 r7:00000019 r6:c0b4ef98
> r5:c0b4efbc r4:c0d04d08
> [    2.309975] [<c01289cc>] (warn_slowpath_fmt) from [<c0504f10>]
> (refcount_warn_saturate+0x108/0x174)
> [    2.310531] r3 : c0a5e1c0  r2 : 00000002  r1 : c0b82860  r0 : 00000000
> [    2.311263] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
> Segment none
> [    2.311298]  r9:600b0013 r8:00000000 r7:00000000 r6:c1787eb8
> r5:c165de3c r4:c1702b38
> [    2.311899] Control: 10c5387d  Table: 8000406a  DAC: 00000051
> [    2.312117] [<c0504e08>] (refcount_warn_saturate) from [<c055657c>]
> (klist_next+0x134/0x138)
> [    2.312755] Register r0 information:
> [    2.312964] [<c0556448>] (klist_next) from [<c06411dc>]
> (bus_for_each_drv+0x74/0xc8)
> [    2.313345]  NULL pointer
> [    2.313690] Register r1 information:
> [    2.313736]  r9:00000000 r8:00000001 r7:c0d04d08 r6:c064373c
> r5:c165de6c r4:00000000
> [    2.313933]  non-slab/vmalloc memory
> [    2.314172] [<c0641168>] (bus_for_each_drv) from [<c0642f38>]
> (__device_attach+0xf0/0x15c)
> [    2.315042]  r7:c10846b8 r6:c13ae444 r5:c13ae400 r4:c0d04d08
> [    2.315060] Register r2 information:
> [    2.315243] [<c0642e48>] (__device_attach) from [<c064390c>]
> (device_initial_probe+0x1c/0x20)
> [    2.315276]  non-paged memory
> [    2.315569]  r8:00000000 r7:c10846b8 r6:c13ae400 r5:c107d690 r4:c13ae400
> [    2.315593] [<c06438f0>] (device_initial_probe) from [<c0642080>]
> (bus_probe_device+0x94/0x9c)
> [    2.316192] [<c0641fec>] (bus_probe_device) from [<c064259c>]
> (deferred_probe_work_func+0x8c/0xb8)
> [    2.316939]  r7:c10846b8 r6:c10846a4 r5:c10846a4 r4:c13ae400
> [    2.317573] [<c0642510>] (deferred_probe_work_func) from [<c01475a4>]
> (process_one_work+0x238/0x594)
> [    2.318513]  r9:00000000 r8:00000000 r7:c1225b00 r6:c1206200
> r5:c16b6f80 r4:c10846d4
> [    2.318931] Register r3 information: non-slab/vmalloc memory
> [    2.319218] [<c014736c>] (process_one_work) from [<c0147bc4>]
> (worker_thread+0x2c4/0x5f4)
> [    2.320001]  r10:c0d03d00 r9:00000088 r8:ffffe000 r7:c1206218
> r6:c16b6f94 r5:c1206200
> [    2.320280] Register r4 information:
> [    2.320614]  r4:c16b6f80
> [    2.320942] [<c0147900>] (worker_thread) from [<c014feb4>]
> (kthread+0x178/0x194)
> [    2.321111]  slab kmalloc-1k
> [    2.321403]  r10:c165c000 r9:c1313e74 r8:00000000 r7:c16b6f80
> r6:c0147900 r5:c16b5580
> [    2.321391]  start c13aec00 pointer offset 280
> [    2.321993]  r4:c13d4980
> [    2.322006]  size 1024
> [    2.322176] [<c014fd3c>] (kthread) from [<c0100150>]
> (ret_from_fork+0x14/0x24)
> [    2.323165] Exception stack(0xc165dfb0 to 0xc165dff8)
> [    2.323187]
> [    2.323371] Register r5 information: non-slab/vmalloc memory
> [    2.323535] Register r6 information: slab kmalloc-1k start c13aec00
> pointer offset 280 size 1024
> [    2.324594] Register r7 information:
> [    2.324597] dfa0:                                     00000000
> 00000000 00000000 00000000
> [    2.325507]  non-slab/vmalloc memory
> [    2.325830] Register r8 information: non-paged memory
> [    2.326267] Register r9 information:
> [    2.326274] dfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.326453]  NULL pointer
> [    2.326942] Register r10 information: NULL pointer
> [    2.327159] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [    2.327258] Register r11 information: non-slab/vmalloc memory
> [    2.327904] Register r12 information:
> [    2.327928]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
> r6:00000000 r5:c014fd3c
> [    2.327937]  non-slab/vmalloc memory
> [    2.328057] Process kworker/0:2 (pid: 41, stack limit = 0x(ptrval))
> [    2.328204] Stack: (0xc1675d70 to 0xc1676000)
> [    2.328479]  r4:c16b5580
> [    2.329851] ---[ end trace f293b13f591ee203 ]---
> [    2.330027] 5d60:                                     c1675d9c
> c1675d80 c064626c c0560ae0
> [    2.331070] 5d80: c1090fc0 c1675df4 c13aed18 c0d04d08 c1675dbc
> c1675da0 c0643778 c06461c0
> [    2.331120] 8<--- cut here ---
> [    2.331739] Unhandled fault: page domain fault (0x01b) at 0x00000010
> [    2.331915] 5da0: 00000000 c1675df4 c064373c c0d04d08 c1675dec
> c1675dc0 c06411d0 c0643748
> [    2.332379] pgd = (ptrval)
> [    2.332408] 5dc0: c1675dec c16bf06c c1787eb8 76076098 c0d04d08
> c13aed18 c13aed5c c13aa800
> [    2.332889] [00000010] *pgd=00000000
> [    2.332951] 5de0: c1675e24 c1675df0 c0642f38 c0641174 c1675e44
> c13aed18 00000001 76076098
> [    2.333535] 5e00: 00000000 c13aed18 c108ea84 c13aed18 c13aa800
> c10843a0 c1675e34 c1675e28
> [    2.334201] 5e20: c064390c c0642e54 c1675e54 c1675e38 c0642080
> c06438fc c13aed18 00000000
> [    2.334845] 5e40: c0d04d08 c13aa800 c1675eb4 c1675e58 c063fa1c
> c0641ff8 c13a5180 c1706380
> [    2.335464] 5e60: eee2e0c0 c1201180 c071c008 c11ea558 c0b7f260
> c0b7f28c c1675eb4 c1675e88
> [    2.336088] 5e80: c02e6368 76076098 00000001 c1706f4c c13aec00
> c108ea54 c1706f40 c11ea558
> [    2.336758] 5ea0: c0b7f260 c0b7f28c c1675ef4 c1675eb8 c071c0e4
> c063f620 c0923300 c0158300
> [    2.337399] 5ec0: 00000000 c13aed18 c1675ef4 c108ea70 c1702e80
> effc4400 effc7700 00000000
> [    2.338025] 5ee0: 00000000 c10b2580 c1675f34 c1675ef8 c01475a4
> c071bf38 c13a5100 ffffe000
> [    2.338663] 5f00: c1675f1c c1675f10 c0149250 c1702e80 effc4400
> c1702e94 effc4418 ffffe000
> [    2.339296] 5f20: 00000008 c0d03d00 c1675f74 c1675f38 c014795c
> c0147378 c130fe74 c0b0c2c4
> [    2.340102] 5f40: c10b1d2a effc4400 c1675f74 c1706000 c1706880
> c0147900 c1702e80 00000000
> [    2.340736] 5f60: c130fe74 c1674000 c1675fac c1675f78 c014feb4
> c014790c c1706024 c1706024
> [    2.341315] 5f80: c1675fac c1706880 c014fd3c 00000000 00000000
> 00000000 00000000 00000000
> [    2.341852] 5fa0: 00000000 c1675fb0 c0100150 c014fd48 00000000
> 00000000 00000000 00000000
> [    2.342407] 5fc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.342957] 5fe0: 00000000 00000000 00000000 00000000 00000013
> 00000000 00000000 00000000
> [    2.343361] Backtrace:
> [    2.343573] [<c0560ad4>] (strcmp) from [<c064626c>]
> (platform_match+0xb8/0xcc)
> [    2.343954] [<c06461b4>] (platform_match) from [<c0643778>]
> (__device_attach_driver+0x3c/0xc4)
> [    2.344369]  r7:c0d04d08 r6:c13aed18 r5:c1675df4 r4:c1090fc0
> [    2.344615] [<c064373c>] (__device_attach_driver) from [<c06411d0>]
> (bus_for_each_drv+0x68/0xc8)
> [    2.345015]  r7:c0d04d08 r6:c064373c r5:c1675df4 r4:00000000
> [    2.345283] [<c0641168>] (bus_for_each_drv) from [<c0642f38>]
> (__device_attach+0xf0/0x15c)
> [    2.345654]  r7:c13aa800 r6:c13aed5c r5:c13aed18 r4:c0d04d08
> [    2.345903] [<c0642e48>] (__device_attach) from [<c064390c>]
> (device_initial_probe+0x1c/0x20)
> [    2.346325]  r8:c10843a0 r7:c13aa800 r6:c13aed18 r5:c108ea84 r4:c13aed18
> [    2.346635] [<c06438f0>] (device_initial_probe) from [<c0642080>]
> (bus_probe_device+0x94/0x9c)
> [    2.347038] [<c0641fec>] (bus_probe_device) from [<c063fa1c>]
> (device_add+0x408/0x8b8)
> [    2.347419]  r7:c13aa800 r6:c0d04d08 r5:00000000 r4:c13aed18
> [    2.347695] [<c063f614>] (device_add) from [<c071c0e4>]
> (serio_handle_event+0x1b8/0x234)
> [    2.348094]  r10:c0b7f28c r9:c0b7f260 r8:c11ea558 r7:c1706f40
> r6:c108ea54 r5:c13aec00
> [    2.348453]  r4:c1706f4c
> [    2.348604] [<c071bf2c>] (serio_handle_event) from [<c01475a4>]
> (process_one_work+0x238/0x594)
> [    2.348968]  r10:c10b2580 r9:00000000 r8:00000000 r7:effc7700
> r6:effc4400 r5:c1702e80
> [    2.349315]  r4:c108ea70
> [    2.349468] [<c014736c>] (process_one_work) from [<c014795c>]
> (worker_thread+0x5c/0x5f4)
> [    2.349875]  r10:c0d03d00 r9:00000008 r8:ffffe000 r7:effc4418
> r6:c1702e94 r5:effc4400
> [    2.350169]  r4:c1702e80
> [    2.350315] [<c0147900>] (worker_thread) from [<c014feb4>]
> (kthread+0x178/0x194)
> [    2.350687]  r10:c1674000 r9:c130fe74 r8:00000000 r7:c1702e80
> r6:c0147900 r5:c1706880
> [    2.351038]  r4:c1706000
> [    2.351182] [<c014fd3c>] (kthread) from [<c0100150>]
> (ret_from_fork+0x14/0x24)
> [    2.351500] Exception stack(0xc1675fb0 to 0xc1675ff8)
> [    2.351855] 5fa0:                                     00000000
> 00000000 00000000 00000000
> [    2.352415] 5fc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.352923] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [    2.353283]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
> r6:00000000 r5:c014fd3c
> [    2.353618]  r4:c1706880
> [    2.354146] Code: e24cb004 ea000001 e3530000 0a000006 (e4d03001)
> [    2.354860] Internal error: : 1b [#2] SMP ARM
> [    2.355172] Modules linked in:
> [    2.355254] ---[ end trace f293b13f591ee204 ]---
> [    2.355650] CPU: 1 PID: 27 Comm: kworker/u4:1 Tainted: G      D
> W         5.14.0-rc7+ #193
> [    2.355888] Kernel panic - not syncing: Fatal exception
> [    2.355990] Hardware name: ARM-Versatile Express
> [    2.356735] Workqueue: events_unbound deferred_probe_work_func
> [    2.357217] PC is at klist_put+0x20/0xa4
> [    2.357537] LR is at klist_iter_exit+0x24/0x30
> [    2.357872] pc : [<c0556280>]    lr : [<c0556340>]    psr: a00b0013
> [    2.358299] sp : c165de00  ip : c165de20  fp : c165de1c
> [    2.358655] r10: c10b2580  r9 : 00000000  r8 : 00000001
> [    2.359009] r7 : c1702b38  r6 : 00000000  r5 : c165de6c  r4 : 00000000
> [    2.359440] r3 : 00000000  r2 : 76076098  r1 : 00000000  r0 : 00000000
> [    2.359893] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
> Segment none
> [    2.360368] Control: 10c5387d  Table: 8177806a  DAC: 00000051
> [    2.360759] Register r0 information: NULL pointer
> [    2.361126] Register r1 information: NULL pointer
> [    2.361477] Register r2 information: non-paged memory
> [    2.361835] Register r3 information: NULL pointer
> [    2.362162] Register r4 information: NULL pointer
> [    2.362486] Register r5 information: non-slab/vmalloc memory
> [    2.362887] Register r6 information: NULL pointer
> [    2.363226] Register r7 information: slab kmalloc-128 start c1702b00
> pointer offset 56 size 128
> [    2.363919] Register r8 information: non-paged memory
> [    2.364299] Register r9 information: NULL pointer
> [    2.364668] Register r10 information: non-slab/vmalloc memory
> [    2.365089] Register r11 information: non-slab/vmalloc memory
> [    2.365466] Register r12 information: non-slab/vmalloc memory
> [    2.365872] Process kworker/u4:1 (pid: 27, stack limit = 0x(ptrval))
> [    2.366297] Stack: (0xc165de00 to 0xc165e000)
> [    2.366769] de00: c165de3c c165de6c c064373c c0d04d08 c165de34
> c165de20 c0556340 c055626c
> [    2.367478] de20: 00000000 c165de6c c165de64 c165de38 c0641208
> c0556328 c165de64 c136996c
> [    2.368192] de40: c1702b38 76076098 c0d04d08 c13ae400 c13ae444
> c10846b8 c165de9c c165de68
> [    2.368934] de60: c0642f38 c0641174 c063c4c4 c13ae400 00000001
> 76076098 00000000 c13ae400
> [    2.369654] de80: c107d690 c13ae400 c10846b8 00000000 c165deac
> c165dea0 c064390c c0642e54
> [    2.370367] dea0: c165decc c165deb0 c0642080 c06438fc c13ae400
> c10846a4 c10846a4 c10846b8
> [    2.371046] dec0: c165def4 c165ded0 c064259c c0641ff8 c10846d4
> c16b6f80 c1206200 c1225b00
> [    2.371774] dee0: 00000000 00000000 c165df34 c165def8 c01475a4
> c064251c c13906c0 ffffe000
> [    2.372470] df00: c165df1c c165df10 c0149250 c16b6f80 c1206200
> c16b6f94 c1206218 ffffe000
> [    2.373189] df20: 00000088 c0d03d00 c165df74 c165df38 c0147bc4
> c0147378 c1313e74 c0b0c2c4
> [    2.373925] df40: c10b1d2a c1206200 c165df74 c13d4980 c16b5580
> c0147900 c16b6f80 00000000
> [    2.374628] df60: c1313e74 c165c000 c165dfac c165df78 c014feb4
> c014790c c13d49a4 c13d49a4
> [    2.375345] df80: c165dfac c16b5580 c014fd3c 00000000 00000000
> 00000000 00000000 00000000
> [    2.376080] dfa0: 00000000 c165dfb0 c0100150 c014fd48 00000000
> 00000000 00000000 00000000
> [    2.376807] dfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.377534] dfe0: 00000000 00000000 00000000 00000000 00000013
> 00000000 00000000 00000000
> [    2.378080] Backtrace:
> [    2.378297] [<c0556260>] (klist_put) from [<c0556340>]
> (klist_iter_exit+0x24/0x30)
> [    2.378819]  r7:c0d04d08 r6:c064373c r5:c165de6c r4:c165de3c
> [    2.379199] [<c055631c>] (klist_iter_exit) from [<c0641208>]
> (bus_for_each_drv+0xa0/0xc8)
> [    2.379701]  r5:c165de6c r4:00000000
> [    2.379932] [<c0641168>] (bus_for_each_drv) from [<c0642f38>]
> (__device_attach+0xf0/0x15c)
> [    2.380482]  r7:c10846b8 r6:c13ae444 r5:c13ae400 r4:c0d04d08
> [    2.380871] [<c0642e48>] (__device_attach) from [<c064390c>]
> (device_initial_probe+0x1c/0x20)
> [    2.381453]  r8:00000000 r7:c10846b8 r6:c13ae400 r5:c107d690 r4:c13ae400
> [    2.381851] [<c06438f0>] (device_initial_probe) from [<c0642080>]
> (bus_probe_device+0x94/0x9c)
> [    2.382364] [<c0641fec>] (bus_probe_device) from [<c064259c>]
> (deferred_probe_work_func+0x8c/0xb8)
> [    2.382936]  r7:c10846b8 r6:c10846a4 r5:c10846a4 r4:c13ae400
> [    2.383266] [<c0642510>] (deferred_probe_work_func) from [<c01475a4>]
> (process_one_work+0x238/0x594)
> [    2.383885]  r9:00000000 r8:00000000 r7:c1225b00 r6:c1206200
> r5:c16b6f80 r4:c10846d4
> [    2.384344] [<c014736c>] (process_one_work) from [<c0147bc4>]
> (worker_thread+0x2c4/0x5f4)
> [    2.384893]  r10:c0d03d00 r9:00000088 r8:ffffe000 r7:c1206218
> r6:c16b6f94 r5:c1206200
> [    2.385341]  r4:c16b6f80
> [    2.385536] [<c0147900>] (worker_thread) from [<c014feb4>]
> (kthread+0x178/0x194)
> [    2.386030]  r10:c165c000 r9:c1313e74 r8:00000000 r7:c16b6f80
> r6:c0147900 r5:c16b5580
> [    2.386540]  r4:c13d4980
> [    2.386742] [<c014fd3c>] (kthread) from [<c0100150>]
> (ret_from_fork+0x14/0x24)
> [    2.387211] Exception stack(0xc165dfb0 to 0xc165dff8)
> [    2.387653] dfa0:                                     00000000
> 00000000 00000000 00000000
> [    2.388374] dfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.389071] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [    2.389546]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
> r6:00000000 r5:c014fd3c
> [    2.390037]  r4:c16b5580
> [    2.390376] Code: e1a07000 e1a06001 e3c44001 e1a00004 (e5945010)
> [    2.390836] ---[ end trace f293b13f591ee205 ]---
> [    2.391580] ---[ end Kernel panic - not syncing: Fatal exception ]---
>
>
> >>> -Saravana
> >>>
> >>> [1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/
> >>> .
> >>>
> > .
> >

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
       [not found]                   ` <85b28900-5f42-b997-2ded-0b952bc2a03e@huawei.com>
@ 2021-08-27 19:09                     ` Saravana Kannan
  2021-08-28  1:09                       ` Kefeng Wang
  0 siblings, 1 reply; 25+ messages in thread
From: Saravana Kannan @ 2021-08-27 19:09 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Dmitry Torokhov,
	linux-input

On Fri, Aug 27, 2021 at 7:38 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> On 2021/8/27 8:04, Saravana Kannan wrote:
> > On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>
> >>>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
> >>>>> solution that we have for amba devices. That causes a bunch of other
> >>>>> headaches. Your patch 3/3 takes us further in the wrong direction by
> >>>>> adding more reasons for delaying the addition of the device.
> >> Hi Saravana, I try the link[1], but with it, there is a crash when boot
> >> (qemu-system-arm -M vexpress-a15),

I'm assuming it's this one?
arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts

> > Hi,
> >
> > It's hard to make sense of the logs. Looks like two different threads
> > might be printing to the log at the same time? Can you please enable
> > the config that prints the thread ID (forgot what it's called) and
> > collect this again? With what I could tell the crash seems to be
> > happening somewhere in platform_match(), but that's not related to
> > this patch at all?
>
> Can you reproduce it? it is very likely related(without your patch, the
> boot is fine),

Sorry, I haven't ever setup qemu and booted vexpress. Thanks for your help.

> the NULL ptr is about serio, it is registed from amba driver.
>
> ambakmi_driver_init
>
>   -- amba_kmi_probe
>
>     -- __serio_register_port

Thanks for the pointer. I took a look at the logs and the code. It's
very strange. As you can see from the backtrace, platform_match() is
being called for the device_add() from serio_handle_event(). But the
device that gets added there is on the serio_bus which obviously
should be using the serio_bus_match.

>
> +Dmitry and input maillist, is there some known issue about serio ?
>
> I add some debug, the full log is attached.
>
> [    2.958355][   T41] input: AT Raw Set 2 keyboard as
> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
> [    2.977441][   T41] serio serio1: pdev c1e05508, pdev->name (null),
> drv c1090fc0, drv->name vexpress-reset

Based on the logs you added, it's pretty clear we are getting to
platform_match(). It's also strange that the drv->name is
vexpress-reset

> [    2.977928][   T41] 8<--- cut here ---
> [    2.978162][   T41] Unhandled fault: page domain fault (0x01b) at
> 0x00000000
> [    2.978494][   T41] pgd = (ptrval)
> [    2.978819][   T41] [00000000] *pgd=00000000
> [    2.979881][   T41] Internal error: : 1b [#1] SMP ARM
> [    2.980385][   T41] Modules linked in:
> [    2.980810][   T41] CPU: 0 PID: 41 Comm: kworker/0:2 Not tainted
> 5.14.0-rc7+ #213
> [    2.981229][   T41] Hardware name: ARM-Versatile Express
> [    2.981780][   T41] Workqueue: events_long serio_handle_event
> [    2.982737][   T41] PC is at strcmp+0x18/0x44
> [    2.983030][   T41] LR is at platform_match+0xdc/0xf0
> [    2.983283][   T41] pc : [<c0560bcc>]    lr : [<c0646358>]    psr:
> 600b0013
> [    2.983572][   T41] sp : c1675d68  ip : c1675d78  fp : c1675d74
> [    2.983832][   T41] r10: 00000000  r9 : 00000000  r8 : 00000001
> [    2.984095][   T41] r7 : c1e05518  r6 : c1675df4  r5 : c1e05518  r4 :
> c1090fc0
> [    2.984395][   T41] r3 : c0a5e180  r2 : 6bede3db  r1 : c0b82a04  r0 :
> 00000000
> [    2.984906][   T41] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32 ISA
> ARM  Segment none

---- 8< ---- cleaning up a bunch of register dumps

> [    3.003113][   T41] Backtrace:
> [    3.003451][   T41] [<c0560bb4>] (strcmp) from [<c0646358>] (platform_match+0xdc/0xf0)
> [    3.003963][   T41] [<c064627c>] (platform_match) from [<c06437d4>] (__device_attach_driver+0x3c/0xf4)
> [    3.004769][   T41] [<c0643798>] (__device_attach_driver) from [<c0641180>] (bus_for_each_drv+0x68/0xc8)
> [    3.005481][   T41] [<c0641118>] (bus_for_each_drv) from [<c0642f40>] (__device_attach+0xf0/0x16c)
> [    3.006152][   T41] [<c0642e50>] (__device_attach) from [<c06439d4>] (device_initial_probe+0x1c/0x20)
> [    3.006853][   T41] [<c06439b8>] (device_initial_probe) from [<c0642030>] (bus_probe_device+0x94/0x9c)
> [    3.007259][   T41] [<c0641f9c>] (bus_probe_device) from [<c063f9cc>] (device_add+0x408/0x8b8)
> [    3.007900][   T41] [<c063f5c4>] (device_add) from [<c071c1cc>] (serio_handle_event+0x1b8/0x234)
> [    3.008824][   T41] [<c071c014>] (serio_handle_event) from [<c01475a4>] (process_one_work+0x238/0x594)
> [    3.009737][   T41] [<c014736c>] (process_one_work) from [<c014795c>] (worker_thread+0x5c/0x5f4)
> [    3.010638][   T41] [<c0147900>] (worker_thread) from [<c014feb4>] (kthread+0x178/0x194)
> [    3.011496][   T41] [<c014fd3c>] (kthread) from [<c0100150>] (ret_from_fork+0x14/0x24)
> [    3.011860][   T41] Exception stack(0xc1675fb0 to 0xc1675ff8)

But the platform_match() is happening for the device_add() from
serio_event_handle() that's adding a device to the serio_bus and it
should be using serio_bus_match().

I haven't reached any conclusion yet, but my current thought process
is that it's either:
1. My patch is somehow causing list corruption. But I don't directly
touch any list in my change (other than deleting a list entirely), so
it's not clear how that would be happening.
2. Without my patch, these AMBA device's probe would be delayed at
least until 5 seconds or possibly later. I'm wondering if my patch is
catching some bad timing assumptions in other code.

You might be able to test out theory (2) by DEFERRED_DEVICE_TIMEOUT to
a much smaller number. Say 500ms or 100ms. If it doesn't crash, it
doesn't mean it's not (2), but if it does, then we know for sure it's
(2).

I'll continue debugging further.

-Saravana

>
> diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
> index 836d6d23bba3..883a58c658c2 100644
> --- a/drivers/amba/bus.c
> +++ b/drivers/amba/bus.c
> @@ -237,6 +237,7 @@ static int amba_match(struct device *dev, struct
> device_driver *drv)
>
>          if (!pcdev->periphid) {
>                  int ret = amba_read_periphid(pcdev);
> +               dev_info(dev, "%s, amba_read_periphid ret = %d\n",
> __func__, ret);
>
>                  if (ret)
>                          return ret;
> @@ -522,6 +523,7 @@ int amba_device_add(struct amba_device *dev, struct
> resource *parent)
>          /* If primecell ID isn't hard-coded, figure it out */
>          if (!dev->periphid) {
>                  ret = amba_read_periphid(dev);
> +               dev_info(&dev->dev, "%s, amba_read_periphid ret = %d\n",
> __func__, ret);
>                  if (ret && ret != -EPROBE_DEFER)
>                          goto err_release;
>                  /*
> diff --git a/drivers/base/platform.c b/drivers/base/platform.c
> index 8640578f45e9..f7c1933c56b5 100644
> --- a/drivers/base/platform.c
> +++ b/drivers/base/platform.c
> @@ -1360,6 +1360,7 @@ static int platform_match(struct device *dev,
> struct device_driver *drv)
>          struct platform_device *pdev = to_platform_device(dev);
>          struct platform_driver *pdrv = to_platform_driver(drv);
>
> +       dev_info(dev, "pdev %px, pdev->name %s, drv %px, drv->name %s",
> pdev, pdev->name, drv, drv->name);
>          /* When driver_override is set, only bind to the matching driver */
>          if (pdev->driver_override)
>                  return !strcmp(pdev->driver_override, drv->name);
>
>
> > [1] - https://lore.kernel.org/lkml/CAGETcx8b228nDUho3cX9AAQ-pXOfZTMv8cj2vhdx9yc_pk8q+A@mail.gmail.com/
> > .
> >
> >>> .
> >>>
> > .
> >

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
  2021-08-27 19:09                     ` Saravana Kannan
@ 2021-08-28  1:09                       ` Kefeng Wang
  2021-09-09  3:30                         ` Saravana Kannan
  0 siblings, 1 reply; 25+ messages in thread
From: Kefeng Wang @ 2021-08-28  1:09 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Dmitry Torokhov,
	linux-input


On 2021/8/28 3:09, Saravana Kannan wrote:
> On Fri, Aug 27, 2021 at 7:38 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>> On 2021/8/27 8:04, Saravana Kannan wrote:
>>> On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
>>>>>>> solution that we have for amba devices. That causes a bunch of other
>>>>>>> headaches. Your patch 3/3 takes us further in the wrong direction by
>>>>>>> adding more reasons for delaying the addition of the device.
>>>> Hi Saravana, I try the link[1], but with it, there is a crash when boot
>>>> (qemu-system-arm -M vexpress-a15),
> I'm assuming it's this one?
> arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts

I use arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts.

qemu-system-arm -M vexpress-a15 -dtb vexpress-v2p-ca15-tc1.dtb -cpu 
cortex-a15 -smp 2 -m size=3G -kernel zImage -rtc base=localtime -initrd 
initrd-arm32 -append 'console=ttyAMA0 cma=0 kfence.sample_interval=0 
earlyprintk debug ' -device virtio-net-device,netdev=net8 -netdev 
type=tap,id=net8,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown 
-nographic

>
>>> Hi,
>>>
>>> It's hard to make sense of the logs. Looks like two different threads
>>> might be printing to the log at the same time? Can you please enable
>>> the config that prints the thread ID (forgot what it's called) and
>>> collect this again? With what I could tell the crash seems to be
>>> happening somewhere in platform_match(), but that's not related to
>>> this patch at all?
>> Can you reproduce it? it is very likely related(without your patch, the
>> boot is fine),
> Sorry, I haven't ever setup qemu and booted vexpress. Thanks for your help.
>
>> the NULL ptr is about serio, it is registed from amba driver.
>>
>> ambakmi_driver_init
>>
>>    -- amba_kmi_probe
>>
>>      -- __serio_register_port
> Thanks for the pointer. I took a look at the logs and the code. It's
> very strange. As you can see from the backtrace, platform_match() is
> being called for the device_add() from serio_handle_event(). But the
> device that gets added there is on the serio_bus which obviously
> should be using the serio_bus_match.
Yes, I am confused too.
>
>> +Dmitry and input maillist, is there some known issue about serio ?
>>
>> I add some debug, the full log is attached.
>>
>> [    2.958355][   T41] input: AT Raw Set 2 keyboard as
>> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
>> [    2.977441][   T41] serio serio1: pdev c1e05508, pdev->name (null),
>> drv c1090fc0, drv->name vexpress-reset
> Based on the logs you added, it's pretty clear we are getting to
> platform_match(). It's also strange that the drv->name is
> vexpress-reset
...
>
>> [    3.003113][   T41] Backtrace:
>> [    3.003451][   T41] [<c0560bb4>] (strcmp) from [<c0646358>] (platform_match+0xdc/0xf0)
>> [    3.003963][   T41] [<c064627c>] (platform_match) from [<c06437d4>] (__device_attach_driver+0x3c/0xf4)
>> [    3.004769][   T41] [<c0643798>] (__device_attach_driver) from [<c0641180>] (bus_for_each_drv+0x68/0xc8)
>> [    3.005481][   T41] [<c0641118>] (bus_for_each_drv) from [<c0642f40>] (__device_attach+0xf0/0x16c)
>> [    3.006152][   T41] [<c0642e50>] (__device_attach) from [<c06439d4>] (device_initial_probe+0x1c/0x20)
>> [    3.006853][   T41] [<c06439b8>] (device_initial_probe) from [<c0642030>] (bus_probe_device+0x94/0x9c)
>> [    3.007259][   T41] [<c0641f9c>] (bus_probe_device) from [<c063f9cc>] (device_add+0x408/0x8b8)
>> [    3.007900][   T41] [<c063f5c4>] (device_add) from [<c071c1cc>] (serio_handle_event+0x1b8/0x234)
>> [    3.008824][   T41] [<c071c014>] (serio_handle_event) from [<c01475a4>] (process_one_work+0x238/0x594)
>> [    3.009737][   T41] [<c014736c>] (process_one_work) from [<c014795c>] (worker_thread+0x5c/0x5f4)
>> [    3.010638][   T41] [<c0147900>] (worker_thread) from [<c014feb4>] (kthread+0x178/0x194)
>> [    3.011496][   T41] [<c014fd3c>] (kthread) from [<c0100150>] (ret_from_fork+0x14/0x24)
>> [    3.011860][   T41] Exception stack(0xc1675fb0 to 0xc1675ff8)
> But the platform_match() is happening for the device_add() from
> serio_event_handle() that's adding a device to the serio_bus and it
> should be using serio_bus_match().
>
> I haven't reached any conclusion yet, but my current thought process
> is that it's either:
> 1. My patch is somehow causing list corruption. But I don't directly
> touch any list in my change (other than deleting a list entirely), so
> it's not clear how that would be happening.

Maybe some concurrent driver load?

> 2. Without my patch, these AMBA device's probe would be delayed at
> least until 5 seconds or possibly later. I'm wondering if my patch is
> catching some bad timing assumptions in other code.

After Rob's patch, It will retry soon.

commit 039599c92d3b2e73689e8b6e519d653fd4770abb
Author: Rob Herring <robh@kernel.org>
Date:   Wed Apr 29 15:58:12 2020 -0500

     amba: Retry adding deferred devices at late_initcall

     If amba bus devices defer when adding, the amba bus code simply retries
     adding the devices every 5 seconds. This doesn't work well as it
     completely unsynchronized with starting the init process which can
     happen in less than 5 secs. Add a retry during late_initcall. If the
     amba devices are added, then deferred probe takes over. If the
     dependencies have not probed at this point, then there's no improvement
     over previous behavior. To completely solve this, we'd need to retry
     after every successful probe as deferred probe does.

     The list_empty() check now happens outside the mutex, but the mutex
     wasn't necessary in the first place.

     This needed to use deferred probe instead of fragile initcall ordering
     on 32-bit VExpress systems where the apb_pclk has a number of probe
     dependencies (vexpress-sysregs, vexpress-config).


>
> You might be able to test out theory (2) by DEFERRED_DEVICE_TIMEOUT to
> a much smaller number. Say 500ms or 100ms. If it doesn't crash, it
> doesn't mean it's not (2), but if it does, then we know for sure it's
> (2).
ok, I will try this one, but due to above patch, it may not work.

>
> I'll continue debugging further.
>
> -Saravana

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
  2021-08-28  1:09                       ` Kefeng Wang
@ 2021-09-09  3:30                         ` Saravana Kannan
  2021-09-10  7:59                           ` Kefeng Wang
  0 siblings, 1 reply; 25+ messages in thread
From: Saravana Kannan @ 2021-09-09  3:30 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Dmitry Torokhov,
	linux-input

On Fri, Aug 27, 2021 at 6:09 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> On 2021/8/28 3:09, Saravana Kannan wrote:
> > On Fri, Aug 27, 2021 at 7:38 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>
> >> On 2021/8/27 8:04, Saravana Kannan wrote:
> >>> On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>>>>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
> >>>>>>> solution that we have for amba devices. That causes a bunch of other
> >>>>>>> headaches. Your patch 3/3 takes us further in the wrong direction by
> >>>>>>> adding more reasons for delaying the addition of the device.
> >>>> Hi Saravana, I try the link[1], but with it, there is a crash when boot
> >>>> (qemu-system-arm -M vexpress-a15),
> > I'm assuming it's this one?
> > arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
>
> I use arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts.
>
> qemu-system-arm -M vexpress-a15 -dtb vexpress-v2p-ca15-tc1.dtb -cpu
> cortex-a15 -smp 2 -m size=3G -kernel zImage -rtc base=localtime -initrd
> initrd-arm32 -append 'console=ttyAMA0 cma=0 kfence.sample_interval=0
> earlyprintk debug ' -device virtio-net-device,netdev=net8 -netdev
> type=tap,id=net8,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
> -nographic
>
> >
> >>> Hi,
> >>>
> >>> It's hard to make sense of the logs. Looks like two different threads
> >>> might be printing to the log at the same time? Can you please enable
> >>> the config that prints the thread ID (forgot what it's called) and
> >>> collect this again? With what I could tell the crash seems to be
> >>> happening somewhere in platform_match(), but that's not related to
> >>> this patch at all?
> >> Can you reproduce it? it is very likely related(without your patch, the
> >> boot is fine),
> > Sorry, I haven't ever setup qemu and booted vexpress. Thanks for your help.
> >
> >> the NULL ptr is about serio, it is registed from amba driver.
> >>
> >> ambakmi_driver_init
> >>
> >>    -- amba_kmi_probe
> >>
> >>      -- __serio_register_port
> > Thanks for the pointer. I took a look at the logs and the code. It's
> > very strange. As you can see from the backtrace, platform_match() is
> > being called for the device_add() from serio_handle_event(). But the
> > device that gets added there is on the serio_bus which obviously
> > should be using the serio_bus_match.
> Yes, I am confused too.
> >
> >> +Dmitry and input maillist, is there some known issue about serio ?
> >>
> >> I add some debug, the full log is attached.
> >>
> >> [    2.958355][   T41] input: AT Raw Set 2 keyboard as
> >> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
> >> [    2.977441][   T41] serio serio1: pdev c1e05508, pdev->name (null),
> >> drv c1090fc0, drv->name vexpress-reset
> > Based on the logs you added, it's pretty clear we are getting to
> > platform_match(). It's also strange that the drv->name is
> > vexpress-reset
> ...
> >
> >> [    3.003113][   T41] Backtrace:
> >> [    3.003451][   T41] [<c0560bb4>] (strcmp) from [<c0646358>] (platform_match+0xdc/0xf0)
> >> [    3.003963][   T41] [<c064627c>] (platform_match) from [<c06437d4>] (__device_attach_driver+0x3c/0xf4)
> >> [    3.004769][   T41] [<c0643798>] (__device_attach_driver) from [<c0641180>] (bus_for_each_drv+0x68/0xc8)
> >> [    3.005481][   T41] [<c0641118>] (bus_for_each_drv) from [<c0642f40>] (__device_attach+0xf0/0x16c)
> >> [    3.006152][   T41] [<c0642e50>] (__device_attach) from [<c06439d4>] (device_initial_probe+0x1c/0x20)
> >> [    3.006853][   T41] [<c06439b8>] (device_initial_probe) from [<c0642030>] (bus_probe_device+0x94/0x9c)
> >> [    3.007259][   T41] [<c0641f9c>] (bus_probe_device) from [<c063f9cc>] (device_add+0x408/0x8b8)
> >> [    3.007900][   T41] [<c063f5c4>] (device_add) from [<c071c1cc>] (serio_handle_event+0x1b8/0x234)
> >> [    3.008824][   T41] [<c071c014>] (serio_handle_event) from [<c01475a4>] (process_one_work+0x238/0x594)
> >> [    3.009737][   T41] [<c014736c>] (process_one_work) from [<c014795c>] (worker_thread+0x5c/0x5f4)
> >> [    3.010638][   T41] [<c0147900>] (worker_thread) from [<c014feb4>] (kthread+0x178/0x194)
> >> [    3.011496][   T41] [<c014fd3c>] (kthread) from [<c0100150>] (ret_from_fork+0x14/0x24)
> >> [    3.011860][   T41] Exception stack(0xc1675fb0 to 0xc1675ff8)
> > But the platform_match() is happening for the device_add() from
> > serio_event_handle() that's adding a device to the serio_bus and it
> > should be using serio_bus_match().
> >
> > I haven't reached any conclusion yet, but my current thought process
> > is that it's either:
> > 1. My patch is somehow causing list corruption. But I don't directly
> > touch any list in my change (other than deleting a list entirely), so
> > it's not clear how that would be happening.
>
> Maybe some concurrent driver load?
>
> > 2. Without my patch, these AMBA device's probe would be delayed at
> > least until 5 seconds or possibly later. I'm wondering if my patch is
> > catching some bad timing assumptions in other code.
>
> After Rob's patch, It will retry soon.
>
> commit 039599c92d3b2e73689e8b6e519d653fd4770abb
> Author: Rob Herring <robh@kernel.org>
> Date:   Wed Apr 29 15:58:12 2020 -0500
>
>      amba: Retry adding deferred devices at late_initcall
>
>      If amba bus devices defer when adding, the amba bus code simply retries
>      adding the devices every 5 seconds. This doesn't work well as it
>      completely unsynchronized with starting the init process which can
>      happen in less than 5 secs. Add a retry during late_initcall. If the
>      amba devices are added, then deferred probe takes over. If the
>      dependencies have not probed at this point, then there's no improvement
>      over previous behavior. To completely solve this, we'd need to retry
>      after every successful probe as deferred probe does.
>
>      The list_empty() check now happens outside the mutex, but the mutex
>      wasn't necessary in the first place.
>
>      This needed to use deferred probe instead of fragile initcall ordering
>      on 32-bit VExpress systems where the apb_pclk has a number of probe
>      dependencies (vexpress-sysregs, vexpress-config).
>
>
> >
> > You might be able to test out theory (2) by DEFERRED_DEVICE_TIMEOUT to
> > a much smaller number. Say 500ms or 100ms. If it doesn't crash, it
> > doesn't mean it's not (2), but if it does, then we know for sure it's
> > (2).
> ok, I will try this one, but due to above patch, it may not work.

Were you able to find anything more?

-Saravana

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
  2021-09-09  3:30                         ` Saravana Kannan
@ 2021-09-10  7:59                           ` Kefeng Wang
  2022-07-05 19:25                             ` Saravana Kannan
  0 siblings, 1 reply; 25+ messages in thread
From: Kefeng Wang @ 2021-09-10  7:59 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Dmitry Torokhov,
	linux-input


On 2021/9/9 11:30, Saravana Kannan wrote:
> On Fri, Aug 27, 2021 at 6:09 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>> On 2021/8/28 3:09, Saravana Kannan wrote:
>>> On Fri, Aug 27, 2021 at 7:38 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>> On 2021/8/27 8:04, Saravana Kannan wrote:
>>>>> On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>>>>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
>>>>>>>>> solution that we have for amba devices. That causes a bunch of other
>>>>>>>>> headaches. Your patch 3/3 takes us further in the wrong direction by
>>>>>>>>> adding more reasons for delaying the addition of the device.
>>>>>> Hi Saravana, I try the link[1], but with it, there is a crash when boot
>>>>>> (qemu-system-arm -M vexpress-a15),
>>> I'm assuming it's this one?
>>> arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
>> I use arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts.
>>
>> qemu-system-arm -M vexpress-a15 -dtb vexpress-v2p-ca15-tc1.dtb -cpu
>> cortex-a15 -smp 2 -m size=3G -kernel zImage -rtc base=localtime -initrd
>> initrd-arm32 -append 'console=ttyAMA0 cma=0 kfence.sample_interval=0
>> earlyprintk debug ' -device virtio-net-device,netdev=net8 -netdev
>> type=tap,id=net8,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
>> -nographic
>>
>>>>> Hi,
>>>>>
>>>>> It's hard to make sense of the logs. Looks like two different threads
>>>>> might be printing to the log at the same time? Can you please enable
>>>>> the config that prints the thread ID (forgot what it's called) and
>>>>> collect this again? With what I could tell the crash seems to be
>>>>> happening somewhere in platform_match(), but that's not related to
>>>>> this patch at all?
>>>> Can you reproduce it? it is very likely related(without your patch, the
>>>> boot is fine),
>>> Sorry, I haven't ever setup qemu and booted vexpress. Thanks for your help.
>>>
>>>> the NULL ptr is about serio, it is registed from amba driver.
>>>>
>>>> ambakmi_driver_init
>>>>
>>>>     -- amba_kmi_probe
>>>>
>>>>       -- __serio_register_port
>>> Thanks for the pointer. I took a look at the logs and the code. It's
>>> very strange. As you can see from the backtrace, platform_match() is
>>> being called for the device_add() from serio_handle_event(). But the
>>> device that gets added there is on the serio_bus which obviously
>>> should be using the serio_bus_match.
>> Yes, I am confused too.
>>>> +Dmitry and input maillist, is there some known issue about serio ?
>>>>
>>>> I add some debug, the full log is attached.
>>>>
>>>> [    2.958355][   T41] input: AT Raw Set 2 keyboard as
>>>> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
>>>> [    2.977441][   T41] serio serio1: pdev c1e05508, pdev->name (null),
>>>> drv c1090fc0, drv->name vexpress-reset
>>> Based on the logs you added, it's pretty clear we are getting to
>>> platform_match(). It's also strange that the drv->name is
>>> vexpress-reset
>> ...
>>>> [    3.003113][   T41] Backtrace:
>>>> [    3.003451][   T41] [<c0560bb4>] (strcmp) from [<c0646358>] (platform_match+0xdc/0xf0)
>>>> [    3.003963][   T41] [<c064627c>] (platform_match) from [<c06437d4>] (__device_attach_driver+0x3c/0xf4)
>>>> [    3.004769][   T41] [<c0643798>] (__device_attach_driver) from [<c0641180>] (bus_for_each_drv+0x68/0xc8)
>>>> [    3.005481][   T41] [<c0641118>] (bus_for_each_drv) from [<c0642f40>] (__device_attach+0xf0/0x16c)
>>>> [    3.006152][   T41] [<c0642e50>] (__device_attach) from [<c06439d4>] (device_initial_probe+0x1c/0x20)
>>>> [    3.006853][   T41] [<c06439b8>] (device_initial_probe) from [<c0642030>] (bus_probe_device+0x94/0x9c)
>>>> [    3.007259][   T41] [<c0641f9c>] (bus_probe_device) from [<c063f9cc>] (device_add+0x408/0x8b8)
>>>> [    3.007900][   T41] [<c063f5c4>] (device_add) from [<c071c1cc>] (serio_handle_event+0x1b8/0x234)
>>>> [    3.008824][   T41] [<c071c014>] (serio_handle_event) from [<c01475a4>] (process_one_work+0x238/0x594)
>>>> [    3.009737][   T41] [<c014736c>] (process_one_work) from [<c014795c>] (worker_thread+0x5c/0x5f4)
>>>> [    3.010638][   T41] [<c0147900>] (worker_thread) from [<c014feb4>] (kthread+0x178/0x194)
>>>> [    3.011496][   T41] [<c014fd3c>] (kthread) from [<c0100150>] (ret_from_fork+0x14/0x24)
>>>> [    3.011860][   T41] Exception stack(0xc1675fb0 to 0xc1675ff8)
>>> But the platform_match() is happening for the device_add() from
>>> serio_event_handle() that's adding a device to the serio_bus and it
>>> should be using serio_bus_match().
>>>
>>> I haven't reached any conclusion yet, but my current thought process
>>> is that it's either:
>>> 1. My patch is somehow causing list corruption. But I don't directly
>>> touch any list in my change (other than deleting a list entirely), so
>>> it's not clear how that would be happening.
>> Maybe some concurrent driver load?
>>
>>> 2. Without my patch, these AMBA device's probe would be delayed at
>>> least until 5 seconds or possibly later. I'm wondering if my patch is
>>> catching some bad timing assumptions in other code.
>> After Rob's patch, It will retry soon.
>>
>> commit 039599c92d3b2e73689e8b6e519d653fd4770abb
>> Author: Rob Herring <robh@kernel.org>
>> Date:   Wed Apr 29 15:58:12 2020 -0500
>>
>>       amba: Retry adding deferred devices at late_initcall
>>
>>       If amba bus devices defer when adding, the amba bus code simply retries
>>       adding the devices every 5 seconds. This doesn't work well as it
>>       completely unsynchronized with starting the init process which can
>>       happen in less than 5 secs. Add a retry during late_initcall. If the
>>       amba devices are added, then deferred probe takes over. If the
>>       dependencies have not probed at this point, then there's no improvement
>>       over previous behavior. To completely solve this, we'd need to retry
>>       after every successful probe as deferred probe does.
>>
>>       The list_empty() check now happens outside the mutex, but the mutex
>>       wasn't necessary in the first place.
>>
>>       This needed to use deferred probe instead of fragile initcall ordering
>>       on 32-bit VExpress systems where the apb_pclk has a number of probe
>>       dependencies (vexpress-sysregs, vexpress-config).
>>
>>
>>> You might be able to test out theory (2) by DEFERRED_DEVICE_TIMEOUT to
>>> a much smaller number. Say 500ms or 100ms. If it doesn't crash, it
>>> doesn't mean it's not (2), but if it does, then we know for sure it's
>>> (2).
>> ok, I will try this one, but due to above patch, it may not work.
> Were you able to find anything more?
I can't find any clue, and have no time to check this for now, is there 
any news from your side?
>
> -Saravana
> .
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
  2021-09-10  7:59                           ` Kefeng Wang
@ 2022-07-05 19:25                             ` Saravana Kannan
  2022-08-27 10:26                               ` Leizhen (ThunderTown)
  0 siblings, 1 reply; 25+ messages in thread
From: Saravana Kannan @ 2022-07-05 19:25 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Dmitry Torokhov,
	linux-input

On Fri, Sep 10, 2021 at 12:59 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
> On 2021/9/9 11:30, Saravana Kannan wrote:
> > On Fri, Aug 27, 2021 at 6:09 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>
> >> On 2021/8/28 3:09, Saravana Kannan wrote:
> >>> On Fri, Aug 27, 2021 at 7:38 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>>> On 2021/8/27 8:04, Saravana Kannan wrote:
> >>>>> On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>>>>>>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
> >>>>>>>>> solution that we have for amba devices. That causes a bunch of other
> >>>>>>>>> headaches. Your patch 3/3 takes us further in the wrong direction by
> >>>>>>>>> adding more reasons for delaying the addition of the device.
> >>>>>> Hi Saravana, I try the link[1], but with it, there is a crash when boot
> >>>>>> (qemu-system-arm -M vexpress-a15),
> >>> I'm assuming it's this one?
> >>> arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> >> I use arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts.
> >>
> >> qemu-system-arm -M vexpress-a15 -dtb vexpress-v2p-ca15-tc1.dtb -cpu
> >> cortex-a15 -smp 2 -m size=3G -kernel zImage -rtc base=localtime -initrd
> >> initrd-arm32 -append 'console=ttyAMA0 cma=0 kfence.sample_interval=0
> >> earlyprintk debug ' -device virtio-net-device,netdev=net8 -netdev
> >> type=tap,id=net8,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
> >> -nographic
> >>
> >>>>> Hi,
> >>>>>
> >>>>> It's hard to make sense of the logs. Looks like two different threads
> >>>>> might be printing to the log at the same time? Can you please enable
> >>>>> the config that prints the thread ID (forgot what it's called) and
> >>>>> collect this again? With what I could tell the crash seems to be
> >>>>> happening somewhere in platform_match(), but that's not related to
> >>>>> this patch at all?
> >>>> Can you reproduce it? it is very likely related(without your patch, the
> >>>> boot is fine),
> >>> Sorry, I haven't ever setup qemu and booted vexpress. Thanks for your help.
> >>>
> >>>> the NULL ptr is about serio, it is registed from amba driver.
> >>>>
> >>>> ambakmi_driver_init
> >>>>
> >>>>     -- amba_kmi_probe
> >>>>
> >>>>       -- __serio_register_port
> >>> Thanks for the pointer. I took a look at the logs and the code. It's
> >>> very strange. As you can see from the backtrace, platform_match() is
> >>> being called for the device_add() from serio_handle_event(). But the
> >>> device that gets added there is on the serio_bus which obviously
> >>> should be using the serio_bus_match.
> >> Yes, I am confused too.
> >>>> +Dmitry and input maillist, is there some known issue about serio ?
> >>>>
> >>>> I add some debug, the full log is attached.
> >>>>
> >>>> [    2.958355][   T41] input: AT Raw Set 2 keyboard as
> >>>> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
> >>>> [    2.977441][   T41] serio serio1: pdev c1e05508, pdev->name (null),
> >>>> drv c1090fc0, drv->name vexpress-reset
> >>> Based on the logs you added, it's pretty clear we are getting to
> >>> platform_match(). It's also strange that the drv->name is
> >>> vexpress-reset
> >> ...
> >>>> [    3.003113][   T41] Backtrace:
> >>>> [    3.003451][   T41] [<c0560bb4>] (strcmp) from [<c0646358>] (platform_match+0xdc/0xf0)
> >>>> [    3.003963][   T41] [<c064627c>] (platform_match) from [<c06437d4>] (__device_attach_driver+0x3c/0xf4)
> >>>> [    3.004769][   T41] [<c0643798>] (__device_attach_driver) from [<c0641180>] (bus_for_each_drv+0x68/0xc8)
> >>>> [    3.005481][   T41] [<c0641118>] (bus_for_each_drv) from [<c0642f40>] (__device_attach+0xf0/0x16c)
> >>>> [    3.006152][   T41] [<c0642e50>] (__device_attach) from [<c06439d4>] (device_initial_probe+0x1c/0x20)
> >>>> [    3.006853][   T41] [<c06439b8>] (device_initial_probe) from [<c0642030>] (bus_probe_device+0x94/0x9c)
> >>>> [    3.007259][   T41] [<c0641f9c>] (bus_probe_device) from [<c063f9cc>] (device_add+0x408/0x8b8)
> >>>> [    3.007900][   T41] [<c063f5c4>] (device_add) from [<c071c1cc>] (serio_handle_event+0x1b8/0x234)
> >>>> [    3.008824][   T41] [<c071c014>] (serio_handle_event) from [<c01475a4>] (process_one_work+0x238/0x594)
> >>>> [    3.009737][   T41] [<c014736c>] (process_one_work) from [<c014795c>] (worker_thread+0x5c/0x5f4)
> >>>> [    3.010638][   T41] [<c0147900>] (worker_thread) from [<c014feb4>] (kthread+0x178/0x194)
> >>>> [    3.011496][   T41] [<c014fd3c>] (kthread) from [<c0100150>] (ret_from_fork+0x14/0x24)
> >>>> [    3.011860][   T41] Exception stack(0xc1675fb0 to 0xc1675ff8)
> >>> But the platform_match() is happening for the device_add() from
> >>> serio_event_handle() that's adding a device to the serio_bus and it
> >>> should be using serio_bus_match().
> >>>
> >>> I haven't reached any conclusion yet, but my current thought process
> >>> is that it's either:
> >>> 1. My patch is somehow causing list corruption. But I don't directly
> >>> touch any list in my change (other than deleting a list entirely), so
> >>> it's not clear how that would be happening.
> >> Maybe some concurrent driver load?
> >>
> >>> 2. Without my patch, these AMBA device's probe would be delayed at
> >>> least until 5 seconds or possibly later. I'm wondering if my patch is
> >>> catching some bad timing assumptions in other code.
> >> After Rob's patch, It will retry soon.
> >>
> >> commit 039599c92d3b2e73689e8b6e519d653fd4770abb
> >> Author: Rob Herring <robh@kernel.org>
> >> Date:   Wed Apr 29 15:58:12 2020 -0500
> >>
> >>       amba: Retry adding deferred devices at late_initcall
> >>
> >>       If amba bus devices defer when adding, the amba bus code simply retries
> >>       adding the devices every 5 seconds. This doesn't work well as it
> >>       completely unsynchronized with starting the init process which can
> >>       happen in less than 5 secs. Add a retry during late_initcall. If the
> >>       amba devices are added, then deferred probe takes over. If the
> >>       dependencies have not probed at this point, then there's no improvement
> >>       over previous behavior. To completely solve this, we'd need to retry
> >>       after every successful probe as deferred probe does.
> >>
> >>       The list_empty() check now happens outside the mutex, but the mutex
> >>       wasn't necessary in the first place.
> >>
> >>       This needed to use deferred probe instead of fragile initcall ordering
> >>       on 32-bit VExpress systems where the apb_pclk has a number of probe
> >>       dependencies (vexpress-sysregs, vexpress-config).
> >>
> >>
> >>> You might be able to test out theory (2) by DEFERRED_DEVICE_TIMEOUT to
> >>> a much smaller number. Say 500ms or 100ms. If it doesn't crash, it
> >>> doesn't mean it's not (2), but if it does, then we know for sure it's
> >>> (2).
> >> ok, I will try this one, but due to above patch, it may not work.
> > Were you able to find anything more?
> I can't find any clue, and have no time to check this for now, is there
> any news from your side?

To close out this thread, the issue was due to a UAF bug in driver
core that was fixed by:
https://lore.kernel.org/all/20220513112444.45112-1-schspa@gmail.com/

With that fix, there wouldn't have been a crash, but amba driver
registration would have failed (because match returned
non-EPROBE_DEFER error).

-Saravana

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [BUG] amba: Remove deferred device addition
  2022-07-05 19:25                             ` Saravana Kannan
@ 2022-08-27 10:26                               ` Leizhen (ThunderTown)
  0 siblings, 0 replies; 25+ messages in thread
From: Leizhen (ThunderTown) @ 2022-08-27 10:26 UTC (permalink / raw)
  To: Saravana Kannan, Kefeng Wang
  Cc: Rob Herring, linux-kernel, Frank Rowand, devicetree,
	Russell King, Linus Walleij, linux-arm-kernel, Dmitry Torokhov,
	linux-input



On 2022/7/6 3:25, Saravana Kannan wrote:
> On Fri, Sep 10, 2021 at 12:59 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>>
>> On 2021/9/9 11:30, Saravana Kannan wrote:
>>> On Fri, Aug 27, 2021 at 6:09 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>
>>>> On 2021/8/28 3:09, Saravana Kannan wrote:
>>>>> On Fri, Aug 27, 2021 at 7:38 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>>> On 2021/8/27 8:04, Saravana Kannan wrote:
>>>>>>> On Thu, Aug 26, 2021 at 1:22 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>>>>>>>>>> Btw, I've been working on [1] cleaning up the one-off deferred probe
>>>>>>>>>>> solution that we have for amba devices. That causes a bunch of other
>>>>>>>>>>> headaches. Your patch 3/3 takes us further in the wrong direction by
>>>>>>>>>>> adding more reasons for delaying the addition of the device.
>>>>>>>> Hi Saravana, I try the link[1], but with it, there is a crash when boot
>>>>>>>> (qemu-system-arm -M vexpress-a15),
>>>>> I'm assuming it's this one?
>>>>> arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
>>>> I use arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts.
>>>>
>>>> qemu-system-arm -M vexpress-a15 -dtb vexpress-v2p-ca15-tc1.dtb -cpu
>>>> cortex-a15 -smp 2 -m size=3G -kernel zImage -rtc base=localtime -initrd
>>>> initrd-arm32 -append 'console=ttyAMA0 cma=0 kfence.sample_interval=0
>>>> earlyprintk debug ' -device virtio-net-device,netdev=net8 -netdev
>>>> type=tap,id=net8,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
>>>> -nographic
>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> It's hard to make sense of the logs. Looks like two different threads
>>>>>>> might be printing to the log at the same time? Can you please enable
>>>>>>> the config that prints the thread ID (forgot what it's called) and
>>>>>>> collect this again? With what I could tell the crash seems to be
>>>>>>> happening somewhere in platform_match(), but that's not related to
>>>>>>> this patch at all?
>>>>>> Can you reproduce it? it is very likely related(without your patch, the
>>>>>> boot is fine),
>>>>> Sorry, I haven't ever setup qemu and booted vexpress. Thanks for your help.
>>>>>
>>>>>> the NULL ptr is about serio, it is registed from amba driver.
>>>>>>
>>>>>> ambakmi_driver_init
>>>>>>
>>>>>>     -- amba_kmi_probe
>>>>>>
>>>>>>       -- __serio_register_port
>>>>> Thanks for the pointer. I took a look at the logs and the code. It's
>>>>> very strange. As you can see from the backtrace, platform_match() is
>>>>> being called for the device_add() from serio_handle_event(). But the
>>>>> device that gets added there is on the serio_bus which obviously
>>>>> should be using the serio_bus_match.
>>>> Yes, I am confused too.
>>>>>> +Dmitry and input maillist, is there some known issue about serio ?
>>>>>>
>>>>>> I add some debug, the full log is attached.
>>>>>>
>>>>>> [    2.958355][   T41] input: AT Raw Set 2 keyboard as
>>>>>> /devices/platform/bus@8000000/bus@8000000:motherboard-bus/bus@8000000:motherboard-bus:iofpga-bus@300000000/1c060000.kmi/serio0/input/input0
>>>>>> [    2.977441][   T41] serio serio1: pdev c1e05508, pdev->name (null),
>>>>>> drv c1090fc0, drv->name vexpress-reset
>>>>> Based on the logs you added, it's pretty clear we are getting to
>>>>> platform_match(). It's also strange that the drv->name is
>>>>> vexpress-reset
>>>> ...
>>>>>> [    3.003113][   T41] Backtrace:
>>>>>> [    3.003451][   T41] [<c0560bb4>] (strcmp) from [<c0646358>] (platform_match+0xdc/0xf0)
>>>>>> [    3.003963][   T41] [<c064627c>] (platform_match) from [<c06437d4>] (__device_attach_driver+0x3c/0xf4)
>>>>>> [    3.004769][   T41] [<c0643798>] (__device_attach_driver) from [<c0641180>] (bus_for_each_drv+0x68/0xc8)
>>>>>> [    3.005481][   T41] [<c0641118>] (bus_for_each_drv) from [<c0642f40>] (__device_attach+0xf0/0x16c)
>>>>>> [    3.006152][   T41] [<c0642e50>] (__device_attach) from [<c06439d4>] (device_initial_probe+0x1c/0x20)
>>>>>> [    3.006853][   T41] [<c06439b8>] (device_initial_probe) from [<c0642030>] (bus_probe_device+0x94/0x9c)
>>>>>> [    3.007259][   T41] [<c0641f9c>] (bus_probe_device) from [<c063f9cc>] (device_add+0x408/0x8b8)
>>>>>> [    3.007900][   T41] [<c063f5c4>] (device_add) from [<c071c1cc>] (serio_handle_event+0x1b8/0x234)
>>>>>> [    3.008824][   T41] [<c071c014>] (serio_handle_event) from [<c01475a4>] (process_one_work+0x238/0x594)
>>>>>> [    3.009737][   T41] [<c014736c>] (process_one_work) from [<c014795c>] (worker_thread+0x5c/0x5f4)
>>>>>> [    3.010638][   T41] [<c0147900>] (worker_thread) from [<c014feb4>] (kthread+0x178/0x194)
>>>>>> [    3.011496][   T41] [<c014fd3c>] (kthread) from [<c0100150>] (ret_from_fork+0x14/0x24)
>>>>>> [    3.011860][   T41] Exception stack(0xc1675fb0 to 0xc1675ff8)
>>>>> But the platform_match() is happening for the device_add() from
>>>>> serio_event_handle() that's adding a device to the serio_bus and it
>>>>> should be using serio_bus_match().
>>>>>
>>>>> I haven't reached any conclusion yet, but my current thought process
>>>>> is that it's either:
>>>>> 1. My patch is somehow causing list corruption. But I don't directly
>>>>> touch any list in my change (other than deleting a list entirely), so
>>>>> it's not clear how that would be happening.
>>>> Maybe some concurrent driver load?
>>>>
>>>>> 2. Without my patch, these AMBA device's probe would be delayed at
>>>>> least until 5 seconds or possibly later. I'm wondering if my patch is
>>>>> catching some bad timing assumptions in other code.
>>>> After Rob's patch, It will retry soon.
>>>>
>>>> commit 039599c92d3b2e73689e8b6e519d653fd4770abb
>>>> Author: Rob Herring <robh@kernel.org>
>>>> Date:   Wed Apr 29 15:58:12 2020 -0500
>>>>
>>>>       amba: Retry adding deferred devices at late_initcall
>>>>
>>>>       If amba bus devices defer when adding, the amba bus code simply retries
>>>>       adding the devices every 5 seconds. This doesn't work well as it
>>>>       completely unsynchronized with starting the init process which can
>>>>       happen in less than 5 secs. Add a retry during late_initcall. If the
>>>>       amba devices are added, then deferred probe takes over. If the
>>>>       dependencies have not probed at this point, then there's no improvement
>>>>       over previous behavior. To completely solve this, we'd need to retry
>>>>       after every successful probe as deferred probe does.
>>>>
>>>>       The list_empty() check now happens outside the mutex, but the mutex
>>>>       wasn't necessary in the first place.
>>>>
>>>>       This needed to use deferred probe instead of fragile initcall ordering
>>>>       on 32-bit VExpress systems where the apb_pclk has a number of probe
>>>>       dependencies (vexpress-sysregs, vexpress-config).
>>>>
>>>>
>>>>> You might be able to test out theory (2) by DEFERRED_DEVICE_TIMEOUT to
>>>>> a much smaller number. Say 500ms or 100ms. If it doesn't crash, it
>>>>> doesn't mean it's not (2), but if it does, then we know for sure it's
>>>>> (2).
>>>> ok, I will try this one, but due to above patch, it may not work.
>>> Were you able to find anything more?
>> I can't find any clue, and have no time to check this for now, is there
>> any news from your side?

Hi, Saravana and Kefeng:
  I've spent the whole afternoon trying to figure this out, and the fix
patch has been cc you two.

> 
> To close out this thread, the issue was due to a UAF bug in driver
> core that was fixed by:
> https://lore.kernel.org/all/20220513112444.45112-1-schspa@gmail.com/
> 
> With that fix, there wouldn't have been a crash, but amba driver
> registration would have failed (because match returned
> non-EPROBE_DEFER error).
> 
> -Saravana
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

-- 
Regards,
  Zhen Lei

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2022-08-27 10:27 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16  7:46 [PATCH 0/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
2021-08-16  7:46 ` [PATCH 1/3] amba: Drop unused functions about APB/AHB devices add Kefeng Wang
2021-08-16  7:46 ` [PATCH 2/3] Revert "ARM: amba: make use of -1 IRQs warn" Kefeng Wang
2021-08-16  7:46 ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Kefeng Wang
2021-08-24 20:05   ` Rob Herring
2021-08-24 20:08     ` Saravana Kannan
2021-08-25  4:05       ` Kefeng Wang
2021-08-25  8:04         ` Saravana Kannan
2021-08-25  8:39           ` Kefeng Wang
2021-08-26  2:45           ` Kefeng Wang
2021-08-26  4:45             ` Saravana Kannan
2021-08-26  6:22               ` Kefeng Wang
     [not found]               ` <df8e7756-8b0d-d7de-a9ff-3f6eb0ffa8a5@huawei.com>
2021-08-27  0:04                 ` [BUG] amba: Remove deferred device addition Saravana Kannan
     [not found]                   ` <85b28900-5f42-b997-2ded-0b952bc2a03e@huawei.com>
2021-08-27 19:09                     ` Saravana Kannan
2021-08-28  1:09                       ` Kefeng Wang
2021-09-09  3:30                         ` Saravana Kannan
2021-09-10  7:59                           ` Kefeng Wang
2022-07-05 19:25                             ` Saravana Kannan
2022-08-27 10:26                               ` Leizhen (ThunderTown)
2021-08-25 12:33         ` [PATCH 3/3] amba: Properly handle device probe without IRQ domain Rob Herring
2021-08-25 14:41           ` Kefeng Wang
2021-08-17 22:27 ` [PATCH 0/3] " Rob Herring
2021-08-23  2:19   ` Kefeng Wang
2021-08-23  9:05     ` Russell King (Oracle)
2021-08-23 10:57       ` Kefeng Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).